drbd: Fixed processing of disk-barrier, disk-flushes and disk-drain
[deliverable/linux.git] / drivers / block / drbd / drbd_receiver.c
CommitLineData
b411b363
PR
1/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
b411b363
PR
26#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
b411b363
PR
31#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
b411b363
PR
39#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
b411b363
PR
44#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
b411b363
PR
47#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
77351055
PR
51struct packet_info {
52 enum drbd_packet cmd;
e2857216
AG
53 unsigned int size;
54 unsigned int vnr;
e658983a 55 void *data;
77351055
PR
56};
57
b411b363
PR
58enum finish_epoch {
59 FE_STILL_LIVE,
60 FE_DESTROYED,
61 FE_RECYCLED,
62};
63
6038178e 64static int drbd_do_features(struct drbd_tconn *tconn);
13e6037d 65static int drbd_do_auth(struct drbd_tconn *tconn);
c141ebda 66static int drbd_disconnected(struct drbd_conf *mdev);
b411b363 67
1e9dd291 68static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
99920dc5 69static int e_end_block(struct drbd_work *, int);
b411b363 70
b411b363
PR
71
72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
73
45bb912b
LE
74/*
75 * some helper functions to deal with single linked page lists,
76 * page->private being our "next" pointer.
77 */
78
79/* If at least n pages are linked at head, get n pages off.
80 * Otherwise, don't modify head, and return NULL.
81 * Locking is the responsibility of the caller.
82 */
83static struct page *page_chain_del(struct page **head, int n)
84{
85 struct page *page;
86 struct page *tmp;
87
88 BUG_ON(!n);
89 BUG_ON(!head);
90
91 page = *head;
23ce4227
PR
92
93 if (!page)
94 return NULL;
95
45bb912b
LE
96 while (page) {
97 tmp = page_chain_next(page);
98 if (--n == 0)
99 break; /* found sufficient pages */
100 if (tmp == NULL)
101 /* insufficient pages, don't use any of them. */
102 return NULL;
103 page = tmp;
104 }
105
106 /* add end of list marker for the returned list */
107 set_page_private(page, 0);
108 /* actual return value, and adjustment of head */
109 page = *head;
110 *head = tmp;
111 return page;
112}
113
114/* may be used outside of locks to find the tail of a (usually short)
115 * "private" page chain, before adding it back to a global chain head
116 * with page_chain_add() under a spinlock. */
117static struct page *page_chain_tail(struct page *page, int *len)
118{
119 struct page *tmp;
120 int i = 1;
121 while ((tmp = page_chain_next(page)))
122 ++i, page = tmp;
123 if (len)
124 *len = i;
125 return page;
126}
127
128static int page_chain_free(struct page *page)
129{
130 struct page *tmp;
131 int i = 0;
132 page_chain_for_each_safe(page, tmp) {
133 put_page(page);
134 ++i;
135 }
136 return i;
137}
138
139static void page_chain_add(struct page **head,
140 struct page *chain_first, struct page *chain_last)
141{
142#if 1
143 struct page *tmp;
144 tmp = page_chain_tail(chain_first, NULL);
145 BUG_ON(tmp != chain_last);
146#endif
147
148 /* add chain to head */
149 set_page_private(chain_last, (unsigned long)*head);
150 *head = chain_first;
151}
152
18c2d522
AG
153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
b411b363
PR
155{
156 struct page *page = NULL;
45bb912b 157 struct page *tmp = NULL;
18c2d522 158 unsigned int i = 0;
b411b363
PR
159
160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
161 * So what. It saves a spin_lock. */
45bb912b 162 if (drbd_pp_vacant >= number) {
b411b363 163 spin_lock(&drbd_pp_lock);
45bb912b
LE
164 page = page_chain_del(&drbd_pp_pool, number);
165 if (page)
166 drbd_pp_vacant -= number;
b411b363 167 spin_unlock(&drbd_pp_lock);
45bb912b
LE
168 if (page)
169 return page;
b411b363 170 }
45bb912b 171
b411b363
PR
172 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173 * "criss-cross" setup, that might cause write-out on some other DRBD,
174 * which in turn might block on the other node at this very place. */
45bb912b
LE
175 for (i = 0; i < number; i++) {
176 tmp = alloc_page(GFP_TRY);
177 if (!tmp)
178 break;
179 set_page_private(tmp, (unsigned long)page);
180 page = tmp;
181 }
182
183 if (i == number)
184 return page;
185
186 /* Not enough pages immediately available this time.
c37c8ecf 187 * No need to jump around here, drbd_alloc_pages will retry this
45bb912b
LE
188 * function "soon". */
189 if (page) {
190 tmp = page_chain_tail(page, NULL);
191 spin_lock(&drbd_pp_lock);
192 page_chain_add(&drbd_pp_pool, page, tmp);
193 drbd_pp_vacant += i;
194 spin_unlock(&drbd_pp_lock);
195 }
196 return NULL;
b411b363
PR
197}
198
a990be46
AG
199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
b411b363 201{
db830c46 202 struct drbd_peer_request *peer_req;
b411b363
PR
203 struct list_head *le, *tle;
204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
210 list_for_each_safe(le, tle, &mdev->net_ee) {
db830c46 211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
045417f7 212 if (drbd_peer_req_has_active_page(peer_req))
b411b363
PR
213 break;
214 list_move(le, to_be_freed);
215 }
216}
217
218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219{
220 LIST_HEAD(reclaimed);
db830c46 221 struct drbd_peer_request *peer_req, *t;
b411b363 222
87eeee41 223 spin_lock_irq(&mdev->tconn->req_lock);
a990be46 224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
87eeee41 225 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 226
db830c46 227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
3967deb1 228 drbd_free_net_peer_req(mdev, peer_req);
b411b363
PR
229}
230
231/**
c37c8ecf 232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
b411b363 233 * @mdev: DRBD device.
45bb912b
LE
234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
236 *
237 * Tries to allocate number pages, first from our own page pool, then from
238 * the kernel, unless this allocation would exceed the max_buffers setting.
239 * Possibly retry until DRBD frees sufficient pages somewhere else.
b411b363 240 *
45bb912b 241 * Returns a page chain linked via page->private.
b411b363 242 */
c37c8ecf
AG
243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
b411b363
PR
245{
246 struct page *page = NULL;
44ed167d 247 struct net_conf *nc;
b411b363 248 DEFINE_WAIT(wait);
44ed167d 249 int mxb;
b411b363 250
45bb912b
LE
251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
44ed167d
PR
253 rcu_read_lock();
254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
18c2d522 259 page = __drbd_alloc_pages(mdev, number);
b411b363 260
45bb912b 261 while (page == NULL) {
b411b363
PR
262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
264 drbd_kick_lo_and_reclaim_net(mdev);
265
44ed167d 266 if (atomic_read(&mdev->pp_in_use) < mxb) {
18c2d522 267 page = __drbd_alloc_pages(mdev, number);
b411b363
PR
268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
c37c8ecf 276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
b411b363
PR
277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
45bb912b
LE
284 if (page)
285 atomic_add(number, &mdev->pp_in_use);
b411b363
PR
286 return page;
287}
288
c37c8ecf 289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
87eeee41 290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
45bb912b
LE
291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
5cc287e0 293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
b411b363 294{
435f0740 295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
b411b363 296 int i;
435f0740 297
81a5d60e 298 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
45bb912b
LE
299 i = page_chain_free(page);
300 else {
301 struct page *tmp;
302 tmp = page_chain_tail(page, &i);
303 spin_lock(&drbd_pp_lock);
304 page_chain_add(&drbd_pp_pool, page, tmp);
305 drbd_pp_vacant += i;
306 spin_unlock(&drbd_pp_lock);
b411b363 307 }
435f0740 308 i = atomic_sub_return(i, a);
45bb912b 309 if (i < 0)
435f0740
LE
310 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
311 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
b411b363
PR
312 wake_up(&drbd_pp_wait);
313}
314
315/*
316You need to hold the req_lock:
317 _drbd_wait_ee_list_empty()
318
319You must not have the req_lock:
3967deb1 320 drbd_free_peer_req()
0db55363 321 drbd_alloc_peer_req()
7721f567 322 drbd_free_peer_reqs()
b411b363 323 drbd_ee_fix_bhs()
a990be46 324 drbd_finish_peer_reqs()
b411b363
PR
325 drbd_clear_done_ee()
326 drbd_wait_ee_list_empty()
327*/
328
f6ffca9f 329struct drbd_peer_request *
0db55363
AG
330drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
331 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
b411b363 332{
db830c46 333 struct drbd_peer_request *peer_req;
b411b363 334 struct page *page;
45bb912b 335 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
b411b363 336
0cf9d27e 337 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
b411b363
PR
338 return NULL;
339
db830c46
AG
340 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
341 if (!peer_req) {
b411b363 342 if (!(gfp_mask & __GFP_NOWARN))
0db55363 343 dev_err(DEV, "%s: allocation failed\n", __func__);
b411b363
PR
344 return NULL;
345 }
346
c37c8ecf 347 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
45bb912b
LE
348 if (!page)
349 goto fail;
b411b363 350
db830c46
AG
351 drbd_clear_interval(&peer_req->i);
352 peer_req->i.size = data_size;
353 peer_req->i.sector = sector;
354 peer_req->i.local = false;
355 peer_req->i.waiting = false;
356
357 peer_req->epoch = NULL;
a21e9298 358 peer_req->w.mdev = mdev;
db830c46
AG
359 peer_req->pages = page;
360 atomic_set(&peer_req->pending_bios, 0);
361 peer_req->flags = 0;
9a8e7753
AG
362 /*
363 * The block_id is opaque to the receiver. It is not endianness
364 * converted, and sent back to the sender unchanged.
365 */
db830c46 366 peer_req->block_id = id;
b411b363 367
db830c46 368 return peer_req;
b411b363 369
45bb912b 370 fail:
db830c46 371 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
372 return NULL;
373}
374
3967deb1 375void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
f6ffca9f 376 int is_net)
b411b363 377{
db830c46
AG
378 if (peer_req->flags & EE_HAS_DIGEST)
379 kfree(peer_req->digest);
5cc287e0 380 drbd_free_pages(mdev, peer_req->pages, is_net);
db830c46
AG
381 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
382 D_ASSERT(drbd_interval_empty(&peer_req->i));
383 mempool_free(peer_req, drbd_ee_mempool);
b411b363
PR
384}
385
7721f567 386int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
b411b363
PR
387{
388 LIST_HEAD(work_list);
db830c46 389 struct drbd_peer_request *peer_req, *t;
b411b363 390 int count = 0;
435f0740 391 int is_net = list == &mdev->net_ee;
b411b363 392
87eeee41 393 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 394 list_splice_init(list, &work_list);
87eeee41 395 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 396
db830c46 397 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
3967deb1 398 __drbd_free_peer_req(mdev, peer_req, is_net);
b411b363
PR
399 count++;
400 }
401 return count;
402}
403
a990be46
AG
404/*
405 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
b411b363 406 */
a990be46 407static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
b411b363
PR
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
db830c46 411 struct drbd_peer_request *peer_req, *t;
e2b3032b 412 int err = 0;
b411b363 413
87eeee41 414 spin_lock_irq(&mdev->tconn->req_lock);
a990be46 415 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
b411b363 416 list_splice_init(&mdev->done_ee, &work_list);
87eeee41 417 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 418
db830c46 419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
3967deb1 420 drbd_free_net_peer_req(mdev, peer_req);
b411b363
PR
421
422 /* possible callbacks here:
7be8da07 423 * e_end_block, and e_end_resync_block, e_send_discard_write.
b411b363
PR
424 * all ignore the last argument.
425 */
db830c46 426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
e2b3032b
AG
427 int err2;
428
b411b363 429 /* list_del not necessary, next/prev members not touched */
e2b3032b
AG
430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
3967deb1 433 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
434 }
435 wake_up(&mdev->ee_wait);
436
e2b3032b 437 return err;
b411b363
PR
438}
439
d4da1537
AG
440static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
441 struct list_head *head)
b411b363
PR
442{
443 DEFINE_WAIT(wait);
444
445 /* avoids spin_lock/unlock
446 * and calling prepare_to_wait in the fast path */
447 while (!list_empty(head)) {
448 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
87eeee41 449 spin_unlock_irq(&mdev->tconn->req_lock);
7eaceacc 450 io_schedule();
b411b363 451 finish_wait(&mdev->ee_wait, &wait);
87eeee41 452 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
453 }
454}
455
d4da1537
AG
456static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
457 struct list_head *head)
b411b363 458{
87eeee41 459 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 460 _drbd_wait_ee_list_empty(mdev, head);
87eeee41 461 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
462}
463
464/* see also kernel_accept; which is only present since 2.6.18.
465 * also we want to log which part of it failed, exactly */
7653620d 466static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
b411b363
PR
467{
468 struct sock *sk = sock->sk;
469 int err = 0;
470
471 *what = "listen";
472 err = sock->ops->listen(sock, 5);
473 if (err < 0)
474 goto out;
475
476 *what = "sock_create_lite";
477 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
478 newsock);
479 if (err < 0)
480 goto out;
481
482 *what = "accept";
483 err = sock->ops->accept(sock, *newsock, 0);
484 if (err < 0) {
485 sock_release(*newsock);
486 *newsock = NULL;
487 goto out;
488 }
489 (*newsock)->ops = sock->ops;
dd9b3604 490 __module_get((*newsock)->ops->owner);
b411b363
PR
491
492out:
493 return err;
494}
495
dbd9eea0 496static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
b411b363
PR
497{
498 mm_segment_t oldfs;
499 struct kvec iov = {
500 .iov_base = buf,
501 .iov_len = size,
502 };
503 struct msghdr msg = {
504 .msg_iovlen = 1,
505 .msg_iov = (struct iovec *)&iov,
506 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
507 };
508 int rv;
509
510 oldfs = get_fs();
511 set_fs(KERNEL_DS);
512 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
513 set_fs(oldfs);
514
515 return rv;
516}
517
de0ff338 518static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
b411b363
PR
519{
520 mm_segment_t oldfs;
521 struct kvec iov = {
522 .iov_base = buf,
523 .iov_len = size,
524 };
525 struct msghdr msg = {
526 .msg_iovlen = 1,
527 .msg_iov = (struct iovec *)&iov,
528 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
529 };
530 int rv;
531
532 oldfs = get_fs();
533 set_fs(KERNEL_DS);
534
535 for (;;) {
de0ff338 536 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
b411b363
PR
537 if (rv == size)
538 break;
539
540 /* Note:
541 * ECONNRESET other side closed the connection
542 * ERESTARTSYS (on sock) we got a signal
543 */
544
545 if (rv < 0) {
546 if (rv == -ECONNRESET)
de0ff338 547 conn_info(tconn, "sock was reset by peer\n");
b411b363 548 else if (rv != -ERESTARTSYS)
de0ff338 549 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
550 break;
551 } else if (rv == 0) {
de0ff338 552 conn_info(tconn, "sock was shut down by peer\n");
b411b363
PR
553 break;
554 } else {
555 /* signal came in, or peer/link went down,
556 * after we read a partial message
557 */
558 /* D_ASSERT(signal_pending(current)); */
559 break;
560 }
561 };
562
563 set_fs(oldfs);
564
565 if (rv != size)
bbeb641c 566 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
b411b363
PR
567
568 return rv;
569}
570
c6967746
AG
571static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
572{
573 int err;
574
575 err = drbd_recv(tconn, buf, size);
576 if (err != size) {
577 if (err >= 0)
578 err = -EIO;
579 } else
580 err = 0;
581 return err;
582}
583
a5c31904
AG
584static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
585{
586 int err;
587
588 err = drbd_recv_all(tconn, buf, size);
589 if (err && !signal_pending(current))
590 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
591 return err;
592}
593
5dbf1673
LE
594/* quoting tcp(7):
595 * On individual connections, the socket buffer size must be set prior to the
596 * listen(2) or connect(2) calls in order to have it take effect.
597 * This is our wrapper to do so.
598 */
599static void drbd_setbufsize(struct socket *sock, unsigned int snd,
600 unsigned int rcv)
601{
602 /* open coded SO_SNDBUF, SO_RCVBUF */
603 if (snd) {
604 sock->sk->sk_sndbuf = snd;
605 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
606 }
607 if (rcv) {
608 sock->sk->sk_rcvbuf = rcv;
609 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
610 }
611}
612
eac3e990 613static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
b411b363
PR
614{
615 const char *what;
616 struct socket *sock;
617 struct sockaddr_in6 src_in6;
44ed167d
PR
618 struct sockaddr_in6 peer_in6;
619 struct net_conf *nc;
620 int err, peer_addr_len, my_addr_len;
69ef82de 621 int sndbuf_size, rcvbuf_size, connect_int;
b411b363
PR
622 int disconnect_on_error = 1;
623
44ed167d
PR
624 rcu_read_lock();
625 nc = rcu_dereference(tconn->net_conf);
626 if (!nc) {
627 rcu_read_unlock();
b411b363 628 return NULL;
44ed167d 629 }
44ed167d
PR
630 sndbuf_size = nc->sndbuf_size;
631 rcvbuf_size = nc->rcvbuf_size;
69ef82de 632 connect_int = nc->connect_int;
089c075d 633 rcu_read_unlock();
44ed167d 634
089c075d
AG
635 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
636 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
44ed167d 637
089c075d 638 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
44ed167d
PR
639 src_in6.sin6_port = 0;
640 else
641 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
642
089c075d
AG
643 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
644 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
b411b363
PR
645
646 what = "sock_create_kern";
44ed167d
PR
647 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
648 SOCK_STREAM, IPPROTO_TCP, &sock);
b411b363
PR
649 if (err < 0) {
650 sock = NULL;
651 goto out;
652 }
653
654 sock->sk->sk_rcvtimeo =
69ef82de 655 sock->sk->sk_sndtimeo = connect_int * HZ;
44ed167d 656 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
b411b363
PR
657
658 /* explicitly bind to the configured IP as source IP
659 * for the outgoing connections.
660 * This is needed for multihomed hosts and to be
661 * able to use lo: interfaces for drbd.
662 * Make sure to use 0 as port number, so linux selects
663 * a free one dynamically.
664 */
b411b363 665 what = "bind before connect";
44ed167d 666 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
b411b363
PR
667 if (err < 0)
668 goto out;
669
670 /* connect may fail, peer not yet available.
671 * stay C_WF_CONNECTION, don't go Disconnecting! */
672 disconnect_on_error = 0;
673 what = "connect";
44ed167d 674 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
b411b363
PR
675
676out:
677 if (err < 0) {
678 if (sock) {
679 sock_release(sock);
680 sock = NULL;
681 }
682 switch (-err) {
683 /* timeout, busy, signal pending */
684 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
685 case EINTR: case ERESTARTSYS:
686 /* peer not (yet) available, network problem */
687 case ECONNREFUSED: case ENETUNREACH:
688 case EHOSTDOWN: case EHOSTUNREACH:
689 disconnect_on_error = 0;
690 break;
691 default:
eac3e990 692 conn_err(tconn, "%s failed, err = %d\n", what, err);
b411b363
PR
693 }
694 if (disconnect_on_error)
bbeb641c 695 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 696 }
44ed167d 697
b411b363
PR
698 return sock;
699}
700
7653620d 701static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
b411b363 702{
44ed167d 703 int timeo, err, my_addr_len;
69ef82de 704 int sndbuf_size, rcvbuf_size, connect_int;
b411b363 705 struct socket *s_estab = NULL, *s_listen;
44ed167d
PR
706 struct sockaddr_in6 my_addr;
707 struct net_conf *nc;
b411b363
PR
708 const char *what;
709
44ed167d
PR
710 rcu_read_lock();
711 nc = rcu_dereference(tconn->net_conf);
712 if (!nc) {
713 rcu_read_unlock();
b411b363 714 return NULL;
44ed167d 715 }
44ed167d
PR
716 sndbuf_size = nc->sndbuf_size;
717 rcvbuf_size = nc->rcvbuf_size;
69ef82de 718 connect_int = nc->connect_int;
44ed167d 719 rcu_read_unlock();
b411b363 720
089c075d
AG
721 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
722 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
723
b411b363 724 what = "sock_create_kern";
44ed167d 725 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
b411b363
PR
726 SOCK_STREAM, IPPROTO_TCP, &s_listen);
727 if (err) {
728 s_listen = NULL;
729 goto out;
730 }
731
69ef82de 732 timeo = connect_int * HZ;
b411b363
PR
733 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
734
735 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
736 s_listen->sk->sk_rcvtimeo = timeo;
737 s_listen->sk->sk_sndtimeo = timeo;
44ed167d 738 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
b411b363
PR
739
740 what = "bind before listen";
44ed167d 741 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
b411b363
PR
742 if (err < 0)
743 goto out;
744
7653620d 745 err = drbd_accept(&what, s_listen, &s_estab);
b411b363
PR
746
747out:
748 if (s_listen)
749 sock_release(s_listen);
750 if (err < 0) {
751 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
7653620d 752 conn_err(tconn, "%s failed, err = %d\n", what, err);
bbeb641c 753 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
754 }
755 }
b411b363
PR
756
757 return s_estab;
758}
759
e658983a 760static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
b411b363 761
9f5bdc33
AG
762static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
763 enum drbd_packet cmd)
764{
765 if (!conn_prepare_command(tconn, sock))
766 return -EIO;
e658983a 767 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
b411b363
PR
768}
769
9f5bdc33 770static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
b411b363 771{
9f5bdc33
AG
772 unsigned int header_size = drbd_header_size(tconn);
773 struct packet_info pi;
774 int err;
b411b363 775
9f5bdc33
AG
776 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
777 if (err != header_size) {
778 if (err >= 0)
779 err = -EIO;
780 return err;
781 }
782 err = decode_header(tconn, tconn->data.rbuf, &pi);
783 if (err)
784 return err;
785 return pi.cmd;
b411b363
PR
786}
787
788/**
789 * drbd_socket_okay() - Free the socket if its connection is not okay
b411b363
PR
790 * @sock: pointer to the pointer to the socket.
791 */
dbd9eea0 792static int drbd_socket_okay(struct socket **sock)
b411b363
PR
793{
794 int rr;
795 char tb[4];
796
797 if (!*sock)
81e84650 798 return false;
b411b363 799
dbd9eea0 800 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
b411b363
PR
801
802 if (rr > 0 || rr == -EAGAIN) {
81e84650 803 return true;
b411b363
PR
804 } else {
805 sock_release(*sock);
806 *sock = NULL;
81e84650 807 return false;
b411b363
PR
808 }
809}
2325eb66
PR
810/* Gets called if a connection is established, or if a new minor gets created
811 in a connection */
c141ebda 812int drbd_connected(struct drbd_conf *mdev)
907599e0 813{
0829f5ed 814 int err;
907599e0
PR
815
816 atomic_set(&mdev->packet_seq, 0);
817 mdev->peer_seq = 0;
818
8410da8f
PR
819 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
820 &mdev->tconn->cstate_mutex :
821 &mdev->own_state_mutex;
822
0829f5ed
AG
823 err = drbd_send_sync_param(mdev);
824 if (!err)
825 err = drbd_send_sizes(mdev, 0, 0);
826 if (!err)
827 err = drbd_send_uuids(mdev);
828 if (!err)
43de7c85 829 err = drbd_send_current_state(mdev);
907599e0
PR
830 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
831 clear_bit(RESIZE_PENDING, &mdev->flags);
8b924f1d 832 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
0829f5ed 833 return err;
907599e0
PR
834}
835
b411b363
PR
836/*
837 * return values:
838 * 1 yes, we have a valid connection
839 * 0 oops, did not work out, please try again
840 * -1 peer talks different language,
841 * no point in trying again, please go standalone.
842 * -2 We do not have a network config...
843 */
81fa2e67 844static int conn_connect(struct drbd_tconn *tconn)
b411b363 845{
7da35862 846 struct drbd_socket sock, msock;
c141ebda 847 struct drbd_conf *mdev;
44ed167d 848 struct net_conf *nc;
c141ebda 849 int vnr, timeout, try, h, ok;
08b165ba 850 bool discard_my_data;
b411b363 851
bbeb641c 852 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
b411b363
PR
853 return -2;
854
7da35862
PR
855 mutex_init(&sock.mutex);
856 sock.sbuf = tconn->data.sbuf;
857 sock.rbuf = tconn->data.rbuf;
858 sock.socket = NULL;
859 mutex_init(&msock.mutex);
860 msock.sbuf = tconn->meta.sbuf;
861 msock.rbuf = tconn->meta.rbuf;
862 msock.socket = NULL;
863
907599e0 864 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
0916e0e3
AG
865
866 /* Assume that the peer only understands protocol 80 until we know better. */
867 tconn->agreed_pro_version = 80;
b411b363 868
b411b363 869 do {
2bf89621
AG
870 struct socket *s;
871
b411b363
PR
872 for (try = 0;;) {
873 /* 3 tries, this should take less than a second! */
907599e0 874 s = drbd_try_connect(tconn);
b411b363
PR
875 if (s || ++try >= 3)
876 break;
877 /* give the other side time to call bind() & listen() */
20ee6390 878 schedule_timeout_interruptible(HZ / 10);
b411b363
PR
879 }
880
881 if (s) {
7da35862
PR
882 if (!sock.socket) {
883 sock.socket = s;
884 send_first_packet(tconn, &sock, P_INITIAL_DATA);
885 } else if (!msock.socket) {
886 msock.socket = s;
887 send_first_packet(tconn, &msock, P_INITIAL_META);
b411b363 888 } else {
81fa2e67 889 conn_err(tconn, "Logic error in conn_connect()\n");
b411b363
PR
890 goto out_release_sockets;
891 }
892 }
893
7da35862
PR
894 if (sock.socket && msock.socket) {
895 rcu_read_lock();
896 nc = rcu_dereference(tconn->net_conf);
897 timeout = nc->ping_timeo * HZ / 10;
898 rcu_read_unlock();
899 schedule_timeout_interruptible(timeout);
900 ok = drbd_socket_okay(&sock.socket);
901 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
902 if (ok)
903 break;
904 }
905
906retry:
907599e0 907 s = drbd_wait_for_connect(tconn);
b411b363 908 if (s) {
9f5bdc33 909 try = receive_first_packet(tconn, s);
7da35862
PR
910 drbd_socket_okay(&sock.socket);
911 drbd_socket_okay(&msock.socket);
b411b363 912 switch (try) {
e5d6f33a 913 case P_INITIAL_DATA:
7da35862 914 if (sock.socket) {
907599e0 915 conn_warn(tconn, "initial packet S crossed\n");
7da35862 916 sock_release(sock.socket);
b411b363 917 }
7da35862 918 sock.socket = s;
b411b363 919 break;
e5d6f33a 920 case P_INITIAL_META:
7da35862 921 if (msock.socket) {
907599e0 922 conn_warn(tconn, "initial packet M crossed\n");
7da35862 923 sock_release(msock.socket);
b411b363 924 }
7da35862 925 msock.socket = s;
907599e0 926 set_bit(DISCARD_CONCURRENT, &tconn->flags);
b411b363
PR
927 break;
928 default:
907599e0 929 conn_warn(tconn, "Error receiving initial packet\n");
b411b363
PR
930 sock_release(s);
931 if (random32() & 1)
932 goto retry;
933 }
934 }
935
bbeb641c 936 if (tconn->cstate <= C_DISCONNECTING)
b411b363
PR
937 goto out_release_sockets;
938 if (signal_pending(current)) {
939 flush_signals(current);
940 smp_rmb();
907599e0 941 if (get_t_state(&tconn->receiver) == EXITING)
b411b363
PR
942 goto out_release_sockets;
943 }
944
7da35862
PR
945 if (sock.socket && &msock.socket) {
946 ok = drbd_socket_okay(&sock.socket);
947 ok = drbd_socket_okay(&msock.socket) && ok;
b411b363
PR
948 if (ok)
949 break;
950 }
951 } while (1);
952
7da35862
PR
953 sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
954 msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
b411b363 955
7da35862
PR
956 sock.socket->sk->sk_allocation = GFP_NOIO;
957 msock.socket->sk->sk_allocation = GFP_NOIO;
b411b363 958
7da35862
PR
959 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
960 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
b411b363 961
b411b363 962 /* NOT YET ...
7da35862
PR
963 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
964 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
6038178e 965 * first set it to the P_CONNECTION_FEATURES timeout,
b411b363 966 * which we set to 4x the configured ping_timeout. */
44ed167d
PR
967 rcu_read_lock();
968 nc = rcu_dereference(tconn->net_conf);
969
7da35862
PR
970 sock.socket->sk->sk_sndtimeo =
971 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
44ed167d 972
7da35862 973 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
44ed167d 974 timeout = nc->timeout * HZ / 10;
08b165ba 975 discard_my_data = nc->discard_my_data;
44ed167d 976 rcu_read_unlock();
b411b363 977
7da35862 978 msock.socket->sk->sk_sndtimeo = timeout;
b411b363
PR
979
980 /* we don't want delays.
25985edc 981 * we use TCP_CORK where appropriate, though */
7da35862
PR
982 drbd_tcp_nodelay(sock.socket);
983 drbd_tcp_nodelay(msock.socket);
b411b363 984
7da35862
PR
985 tconn->data.socket = sock.socket;
986 tconn->meta.socket = msock.socket;
907599e0 987 tconn->last_received = jiffies;
b411b363 988
6038178e 989 h = drbd_do_features(tconn);
b411b363
PR
990 if (h <= 0)
991 return h;
992
907599e0 993 if (tconn->cram_hmac_tfm) {
b411b363 994 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
907599e0 995 switch (drbd_do_auth(tconn)) {
b10d96cb 996 case -1:
907599e0 997 conn_err(tconn, "Authentication of peer failed\n");
b411b363 998 return -1;
b10d96cb 999 case 0:
907599e0 1000 conn_err(tconn, "Authentication of peer failed, trying again.\n");
b10d96cb 1001 return 0;
b411b363
PR
1002 }
1003 }
1004
7da35862
PR
1005 tconn->data.socket->sk->sk_sndtimeo = timeout;
1006 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
b411b363 1007
387eb308 1008 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
7e2455c1 1009 return -1;
b411b363 1010
c141ebda
PR
1011 rcu_read_lock();
1012 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1013 kref_get(&mdev->kref);
1014 rcu_read_unlock();
08b165ba
PR
1015
1016 if (discard_my_data)
1017 set_bit(DISCARD_MY_DATA, &mdev->flags);
1018 else
1019 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1020
c141ebda
PR
1021 drbd_connected(mdev);
1022 kref_put(&mdev->kref, &drbd_minor_destroy);
1023 rcu_read_lock();
1024 }
1025 rcu_read_unlock();
1026
823bd832
PR
1027 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
1028 return 0;
1029
1030 drbd_thread_start(&tconn->asender);
1031
08b165ba
PR
1032 mutex_lock(&tconn->conf_update);
1033 /* The discard_my_data flag is a single-shot modifier to the next
1034 * connection attempt, the handshake of which is now well underway.
1035 * No need for rcu style copying of the whole struct
1036 * just to clear a single value. */
1037 tconn->net_conf->discard_my_data = 0;
1038 mutex_unlock(&tconn->conf_update);
1039
d3fcb490 1040 return h;
b411b363
PR
1041
1042out_release_sockets:
7da35862
PR
1043 if (sock.socket)
1044 sock_release(sock.socket);
1045 if (msock.socket)
1046 sock_release(msock.socket);
b411b363
PR
1047 return -1;
1048}
1049
e658983a 1050static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
b411b363 1051{
e658983a
AG
1052 unsigned int header_size = drbd_header_size(tconn);
1053
0c8e36d9
AG
1054 if (header_size == sizeof(struct p_header100) &&
1055 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1056 struct p_header100 *h = header;
1057 if (h->pad != 0) {
1058 conn_err(tconn, "Header padding is not zero\n");
1059 return -EINVAL;
1060 }
1061 pi->vnr = be16_to_cpu(h->volume);
1062 pi->cmd = be16_to_cpu(h->command);
1063 pi->size = be32_to_cpu(h->length);
1064 } else if (header_size == sizeof(struct p_header95) &&
1065 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
e658983a 1066 struct p_header95 *h = header;
e658983a 1067 pi->cmd = be16_to_cpu(h->command);
b55d84ba
AG
1068 pi->size = be32_to_cpu(h->length);
1069 pi->vnr = 0;
e658983a
AG
1070 } else if (header_size == sizeof(struct p_header80) &&
1071 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1072 struct p_header80 *h = header;
1073 pi->cmd = be16_to_cpu(h->command);
1074 pi->size = be16_to_cpu(h->length);
77351055 1075 pi->vnr = 0;
02918be2 1076 } else {
e658983a
AG
1077 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1078 be32_to_cpu(*(__be32 *)header),
1079 tconn->agreed_pro_version);
8172f3e9 1080 return -EINVAL;
b411b363 1081 }
e658983a 1082 pi->data = header + header_size;
8172f3e9 1083 return 0;
257d0af6
PR
1084}
1085
9ba7aa00 1086static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
257d0af6 1087{
e658983a 1088 void *buffer = tconn->data.rbuf;
69bc7bc3 1089 int err;
257d0af6 1090
e658983a 1091 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
a5c31904 1092 if (err)
69bc7bc3 1093 return err;
257d0af6 1094
e658983a 1095 err = decode_header(tconn, buffer, pi);
9ba7aa00 1096 tconn->last_received = jiffies;
b411b363 1097
69bc7bc3 1098 return err;
b411b363
PR
1099}
1100
4b0007c0 1101static void drbd_flush(struct drbd_tconn *tconn)
b411b363
PR
1102{
1103 int rv;
4b0007c0
PR
1104 struct drbd_conf *mdev;
1105 int vnr;
1106
1107 if (tconn->write_ordering >= WO_bdev_flush) {
615e087f 1108 rcu_read_lock();
4b0007c0 1109 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
615e087f
LE
1110 if (!get_ldev(mdev))
1111 continue;
1112 kref_get(&mdev->kref);
1113 rcu_read_unlock();
1114
1115 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1116 GFP_NOIO, NULL);
1117 if (rv) {
1118 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1119 /* would rather check on EOPNOTSUPP, but that is not reliable.
1120 * don't try again for ANY return value != 0
1121 * if (rv == -EOPNOTSUPP) */
1122 drbd_bump_write_ordering(tconn, WO_drain_io);
4b0007c0 1123 }
615e087f
LE
1124 put_ldev(mdev);
1125 kref_put(&mdev->kref, &drbd_minor_destroy);
1126
1127 rcu_read_lock();
1128 if (rv)
1129 break;
b411b363 1130 }
615e087f 1131 rcu_read_unlock();
b411b363 1132 }
b411b363
PR
1133}
1134
1135/**
1136 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1137 * @mdev: DRBD device.
1138 * @epoch: Epoch object.
1139 * @ev: Epoch event.
1140 */
1e9dd291 1141static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
b411b363
PR
1142 struct drbd_epoch *epoch,
1143 enum epoch_event ev)
1144{
2451fc3b 1145 int epoch_size;
b411b363 1146 struct drbd_epoch *next_epoch;
b411b363
PR
1147 enum finish_epoch rv = FE_STILL_LIVE;
1148
12038a3a 1149 spin_lock(&tconn->epoch_lock);
b411b363
PR
1150 do {
1151 next_epoch = NULL;
b411b363
PR
1152
1153 epoch_size = atomic_read(&epoch->epoch_size);
1154
1155 switch (ev & ~EV_CLEANUP) {
1156 case EV_PUT:
1157 atomic_dec(&epoch->active);
1158 break;
1159 case EV_GOT_BARRIER_NR:
1160 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
b411b363
PR
1161 break;
1162 case EV_BECAME_LAST:
1163 /* nothing to do*/
1164 break;
1165 }
1166
b411b363
PR
1167 if (epoch_size != 0 &&
1168 atomic_read(&epoch->active) == 0 &&
85d73513 1169 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
b411b363 1170 if (!(ev & EV_CLEANUP)) {
12038a3a 1171 spin_unlock(&tconn->epoch_lock);
9ed57dcb 1172 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
12038a3a 1173 spin_lock(&tconn->epoch_lock);
b411b363 1174 }
9ed57dcb
LE
1175#if 0
1176 /* FIXME: dec unacked on connection, once we have
1177 * something to count pending connection packets in. */
85d73513 1178 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
9ed57dcb
LE
1179 dec_unacked(epoch->tconn);
1180#endif
b411b363 1181
12038a3a 1182 if (tconn->current_epoch != epoch) {
b411b363
PR
1183 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1184 list_del(&epoch->list);
1185 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
12038a3a 1186 tconn->epochs--;
b411b363
PR
1187 kfree(epoch);
1188
1189 if (rv == FE_STILL_LIVE)
1190 rv = FE_DESTROYED;
1191 } else {
1192 epoch->flags = 0;
1193 atomic_set(&epoch->epoch_size, 0);
698f9315 1194 /* atomic_set(&epoch->active, 0); is already zero */
b411b363
PR
1195 if (rv == FE_STILL_LIVE)
1196 rv = FE_RECYCLED;
1197 }
1198 }
1199
1200 if (!next_epoch)
1201 break;
1202
1203 epoch = next_epoch;
1204 } while (1);
1205
12038a3a 1206 spin_unlock(&tconn->epoch_lock);
b411b363 1207
b411b363
PR
1208 return rv;
1209}
1210
1211/**
1212 * drbd_bump_write_ordering() - Fall back to an other write ordering method
4b0007c0 1213 * @tconn: DRBD connection.
b411b363
PR
1214 * @wo: Write ordering method to try.
1215 */
4b0007c0 1216void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
b411b363 1217{
daeda1cc 1218 struct disk_conf *dc;
4b0007c0 1219 struct drbd_conf *mdev;
b411b363 1220 enum write_ordering_e pwo;
4b0007c0 1221 int vnr;
b411b363
PR
1222 static char *write_ordering_str[] = {
1223 [WO_none] = "none",
1224 [WO_drain_io] = "drain",
1225 [WO_bdev_flush] = "flush",
b411b363
PR
1226 };
1227
4b0007c0 1228 pwo = tconn->write_ordering;
b411b363 1229 wo = min(pwo, wo);
daeda1cc 1230 rcu_read_lock();
4b0007c0 1231 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
27eb13e9 1232 if (!get_ldev_if_state(mdev, D_ATTACHING))
4b0007c0
PR
1233 continue;
1234 dc = rcu_dereference(mdev->ldev->disk_conf);
1235
1236 if (wo == WO_bdev_flush && !dc->disk_flushes)
1237 wo = WO_drain_io;
1238 if (wo == WO_drain_io && !dc->disk_drain)
1239 wo = WO_none;
1240 put_ldev(mdev);
1241 }
daeda1cc 1242 rcu_read_unlock();
4b0007c0
PR
1243 tconn->write_ordering = wo;
1244 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1245 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
b411b363
PR
1246}
1247
45bb912b 1248/**
fbe29dec 1249 * drbd_submit_peer_request()
45bb912b 1250 * @mdev: DRBD device.
db830c46 1251 * @peer_req: peer request
45bb912b 1252 * @rw: flag field, see bio->bi_rw
10f6d992
LE
1253 *
1254 * May spread the pages to multiple bios,
1255 * depending on bio_add_page restrictions.
1256 *
1257 * Returns 0 if all bios have been submitted,
1258 * -ENOMEM if we could not allocate enough bios,
1259 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1260 * single page to an empty bio (which should never happen and likely indicates
1261 * that the lower level IO stack is in some way broken). This has been observed
1262 * on certain Xen deployments.
45bb912b
LE
1263 */
1264/* TODO allocate from our own bio_set. */
fbe29dec
AG
1265int drbd_submit_peer_request(struct drbd_conf *mdev,
1266 struct drbd_peer_request *peer_req,
1267 const unsigned rw, const int fault_type)
45bb912b
LE
1268{
1269 struct bio *bios = NULL;
1270 struct bio *bio;
db830c46
AG
1271 struct page *page = peer_req->pages;
1272 sector_t sector = peer_req->i.sector;
1273 unsigned ds = peer_req->i.size;
45bb912b
LE
1274 unsigned n_bios = 0;
1275 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
10f6d992 1276 int err = -ENOMEM;
45bb912b
LE
1277
1278 /* In most cases, we will only need one bio. But in case the lower
1279 * level restrictions happen to be different at this offset on this
1280 * side than those of the sending peer, we may need to submit the
da4a75d2
LE
1281 * request in more than one bio.
1282 *
1283 * Plain bio_alloc is good enough here, this is no DRBD internally
1284 * generated bio, but a bio allocated on behalf of the peer.
1285 */
45bb912b
LE
1286next_bio:
1287 bio = bio_alloc(GFP_NOIO, nr_pages);
1288 if (!bio) {
1289 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1290 goto fail;
1291 }
db830c46 1292 /* > peer_req->i.sector, unless this is the first bio */
45bb912b
LE
1293 bio->bi_sector = sector;
1294 bio->bi_bdev = mdev->ldev->backing_bdev;
45bb912b 1295 bio->bi_rw = rw;
db830c46 1296 bio->bi_private = peer_req;
fcefa62e 1297 bio->bi_end_io = drbd_peer_request_endio;
45bb912b
LE
1298
1299 bio->bi_next = bios;
1300 bios = bio;
1301 ++n_bios;
1302
1303 page_chain_for_each(page) {
1304 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1305 if (!bio_add_page(bio, page, len, 0)) {
10f6d992
LE
1306 /* A single page must always be possible!
1307 * But in case it fails anyways,
1308 * we deal with it, and complain (below). */
1309 if (bio->bi_vcnt == 0) {
1310 dev_err(DEV,
1311 "bio_add_page failed for len=%u, "
1312 "bi_vcnt=0 (bi_sector=%llu)\n",
1313 len, (unsigned long long)bio->bi_sector);
1314 err = -ENOSPC;
1315 goto fail;
1316 }
45bb912b
LE
1317 goto next_bio;
1318 }
1319 ds -= len;
1320 sector += len >> 9;
1321 --nr_pages;
1322 }
1323 D_ASSERT(page == NULL);
1324 D_ASSERT(ds == 0);
1325
db830c46 1326 atomic_set(&peer_req->pending_bios, n_bios);
45bb912b
LE
1327 do {
1328 bio = bios;
1329 bios = bios->bi_next;
1330 bio->bi_next = NULL;
1331
45bb912b 1332 drbd_generic_make_request(mdev, fault_type, bio);
45bb912b 1333 } while (bios);
45bb912b
LE
1334 return 0;
1335
1336fail:
1337 while (bios) {
1338 bio = bios;
1339 bios = bios->bi_next;
1340 bio_put(bio);
1341 }
10f6d992 1342 return err;
45bb912b
LE
1343}
1344
53840641 1345static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
db830c46 1346 struct drbd_peer_request *peer_req)
53840641 1347{
db830c46 1348 struct drbd_interval *i = &peer_req->i;
53840641
AG
1349
1350 drbd_remove_interval(&mdev->write_requests, i);
1351 drbd_clear_interval(i);
1352
6c852bec 1353 /* Wake up any processes waiting for this peer request to complete. */
53840641
AG
1354 if (i->waiting)
1355 wake_up(&mdev->misc_wait);
1356}
1357
77fede51
PR
1358void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1359{
1360 struct drbd_conf *mdev;
1361 int vnr;
1362
1363 rcu_read_lock();
1364 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1365 kref_get(&mdev->kref);
1366 rcu_read_unlock();
1367 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1368 kref_put(&mdev->kref, &drbd_minor_destroy);
1369 rcu_read_lock();
1370 }
1371 rcu_read_unlock();
1372}
1373
4a76b161 1374static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1375{
2451fc3b 1376 int rv;
e658983a 1377 struct p_barrier *p = pi->data;
b411b363
PR
1378 struct drbd_epoch *epoch;
1379
9ed57dcb
LE
1380 /* FIXME these are unacked on connection,
1381 * not a specific (peer)device.
1382 */
12038a3a 1383 tconn->current_epoch->barrier_nr = p->barrier;
9ed57dcb 1384 tconn->current_epoch->tconn = tconn;
1e9dd291 1385 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
b411b363
PR
1386
1387 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1388 * the activity log, which means it would not be resynced in case the
1389 * R_PRIMARY crashes now.
1390 * Therefore we must send the barrier_ack after the barrier request was
1391 * completed. */
4b0007c0 1392 switch (tconn->write_ordering) {
b411b363
PR
1393 case WO_none:
1394 if (rv == FE_RECYCLED)
82bc0194 1395 return 0;
2451fc3b
PR
1396
1397 /* receiver context, in the writeout path of the other node.
1398 * avoid potential distributed deadlock */
1399 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1400 if (epoch)
1401 break;
1402 else
9ed57dcb 1403 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
2451fc3b 1404 /* Fall through */
b411b363
PR
1405
1406 case WO_bdev_flush:
1407 case WO_drain_io:
77fede51 1408 conn_wait_active_ee_empty(tconn);
4b0007c0 1409 drbd_flush(tconn);
2451fc3b 1410
12038a3a 1411 if (atomic_read(&tconn->current_epoch->epoch_size)) {
2451fc3b
PR
1412 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1413 if (epoch)
1414 break;
b411b363
PR
1415 }
1416
82bc0194 1417 return 0;
2451fc3b 1418 default:
9ed57dcb 1419 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
82bc0194 1420 return -EIO;
b411b363
PR
1421 }
1422
1423 epoch->flags = 0;
1424 atomic_set(&epoch->epoch_size, 0);
1425 atomic_set(&epoch->active, 0);
1426
12038a3a
PR
1427 spin_lock(&tconn->epoch_lock);
1428 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1429 list_add(&epoch->list, &tconn->current_epoch->list);
1430 tconn->current_epoch = epoch;
1431 tconn->epochs++;
b411b363
PR
1432 } else {
1433 /* The current_epoch got recycled while we allocated this one... */
1434 kfree(epoch);
1435 }
12038a3a 1436 spin_unlock(&tconn->epoch_lock);
b411b363 1437
82bc0194 1438 return 0;
b411b363
PR
1439}
1440
1441/* used from receive_RSDataReply (recv_resync_read)
1442 * and from receive_Data */
f6ffca9f
AG
1443static struct drbd_peer_request *
1444read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1445 int data_size) __must_hold(local)
b411b363 1446{
6666032a 1447 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
db830c46 1448 struct drbd_peer_request *peer_req;
b411b363 1449 struct page *page;
a5c31904 1450 int dgs, ds, err;
a0638456
PR
1451 void *dig_in = mdev->tconn->int_dig_in;
1452 void *dig_vv = mdev->tconn->int_dig_vv;
6b4388ac 1453 unsigned long *data;
b411b363 1454
88104ca4
AG
1455 dgs = 0;
1456 if (mdev->tconn->peer_integrity_tfm) {
1457 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
9f5bdc33
AG
1458 /*
1459 * FIXME: Receive the incoming digest into the receive buffer
1460 * here, together with its struct p_data?
1461 */
a5c31904
AG
1462 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1463 if (err)
b411b363 1464 return NULL;
88104ca4 1465 data_size -= dgs;
b411b363
PR
1466 }
1467
841ce241
AG
1468 if (!expect(data_size != 0))
1469 return NULL;
1470 if (!expect(IS_ALIGNED(data_size, 512)))
1471 return NULL;
1472 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1473 return NULL;
b411b363 1474
6666032a
LE
1475 /* even though we trust out peer,
1476 * we sometimes have to double check. */
1477 if (sector + (data_size>>9) > capacity) {
fdda6544
LE
1478 dev_err(DEV, "request from peer beyond end of local disk: "
1479 "capacity: %llus < sector: %llus + size: %u\n",
6666032a
LE
1480 (unsigned long long)capacity,
1481 (unsigned long long)sector, data_size);
1482 return NULL;
1483 }
1484
b411b363
PR
1485 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1486 * "criss-cross" setup, that might cause write-out on some other DRBD,
1487 * which in turn might block on the other node at this very place. */
0db55363 1488 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
db830c46 1489 if (!peer_req)
b411b363 1490 return NULL;
45bb912b 1491
b411b363 1492 ds = data_size;
db830c46 1493 page = peer_req->pages;
45bb912b
LE
1494 page_chain_for_each(page) {
1495 unsigned len = min_t(int, ds, PAGE_SIZE);
6b4388ac 1496 data = kmap(page);
a5c31904 1497 err = drbd_recv_all_warn(mdev->tconn, data, len);
0cf9d27e 1498 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
6b4388ac
PR
1499 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1500 data[0] = data[0] ^ (unsigned long)-1;
1501 }
b411b363 1502 kunmap(page);
a5c31904 1503 if (err) {
3967deb1 1504 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
1505 return NULL;
1506 }
a5c31904 1507 ds -= len;
b411b363
PR
1508 }
1509
1510 if (dgs) {
5b614abe 1511 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
b411b363 1512 if (memcmp(dig_in, dig_vv, dgs)) {
470be44a
LE
1513 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1514 (unsigned long long)sector, data_size);
3967deb1 1515 drbd_free_peer_req(mdev, peer_req);
b411b363
PR
1516 return NULL;
1517 }
1518 }
1519 mdev->recv_cnt += data_size>>9;
db830c46 1520 return peer_req;
b411b363
PR
1521}
1522
1523/* drbd_drain_block() just takes a data block
1524 * out of the socket input buffer, and discards it.
1525 */
1526static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1527{
1528 struct page *page;
a5c31904 1529 int err = 0;
b411b363
PR
1530 void *data;
1531
c3470cde 1532 if (!data_size)
fc5be839 1533 return 0;
c3470cde 1534
c37c8ecf 1535 page = drbd_alloc_pages(mdev, 1, 1);
b411b363
PR
1536
1537 data = kmap(page);
1538 while (data_size) {
fc5be839
AG
1539 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1540
a5c31904
AG
1541 err = drbd_recv_all_warn(mdev->tconn, data, len);
1542 if (err)
b411b363 1543 break;
a5c31904 1544 data_size -= len;
b411b363
PR
1545 }
1546 kunmap(page);
5cc287e0 1547 drbd_free_pages(mdev, page, 0);
fc5be839 1548 return err;
b411b363
PR
1549}
1550
1551static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1552 sector_t sector, int data_size)
1553{
1554 struct bio_vec *bvec;
1555 struct bio *bio;
a5c31904 1556 int dgs, err, i, expect;
a0638456
PR
1557 void *dig_in = mdev->tconn->int_dig_in;
1558 void *dig_vv = mdev->tconn->int_dig_vv;
b411b363 1559
88104ca4
AG
1560 dgs = 0;
1561 if (mdev->tconn->peer_integrity_tfm) {
1562 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
a5c31904
AG
1563 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1564 if (err)
1565 return err;
88104ca4 1566 data_size -= dgs;
b411b363
PR
1567 }
1568
b411b363
PR
1569 /* optimistically update recv_cnt. if receiving fails below,
1570 * we disconnect anyways, and counters will be reset. */
1571 mdev->recv_cnt += data_size>>9;
1572
1573 bio = req->master_bio;
1574 D_ASSERT(sector == bio->bi_sector);
1575
1576 bio_for_each_segment(bvec, bio, i) {
a5c31904 1577 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
b411b363 1578 expect = min_t(int, data_size, bvec->bv_len);
a5c31904 1579 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
b411b363 1580 kunmap(bvec->bv_page);
a5c31904
AG
1581 if (err)
1582 return err;
1583 data_size -= expect;
b411b363
PR
1584 }
1585
1586 if (dgs) {
5b614abe 1587 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
b411b363
PR
1588 if (memcmp(dig_in, dig_vv, dgs)) {
1589 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
28284cef 1590 return -EINVAL;
b411b363
PR
1591 }
1592 }
1593
1594 D_ASSERT(data_size == 0);
28284cef 1595 return 0;
b411b363
PR
1596}
1597
a990be46
AG
1598/*
1599 * e_end_resync_block() is called in asender context via
1600 * drbd_finish_peer_reqs().
1601 */
99920dc5 1602static int e_end_resync_block(struct drbd_work *w, int unused)
b411b363 1603{
8050e6d0
AG
1604 struct drbd_peer_request *peer_req =
1605 container_of(w, struct drbd_peer_request, w);
00d56944 1606 struct drbd_conf *mdev = w->mdev;
db830c46 1607 sector_t sector = peer_req->i.sector;
99920dc5 1608 int err;
b411b363 1609
db830c46 1610 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1611
db830c46
AG
1612 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1613 drbd_set_in_sync(mdev, sector, peer_req->i.size);
99920dc5 1614 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
b411b363
PR
1615 } else {
1616 /* Record failure to sync */
db830c46 1617 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
b411b363 1618
99920dc5 1619 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1620 }
1621 dec_unacked(mdev);
1622
99920dc5 1623 return err;
b411b363
PR
1624}
1625
1626static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1627{
db830c46 1628 struct drbd_peer_request *peer_req;
b411b363 1629
db830c46
AG
1630 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1631 if (!peer_req)
45bb912b 1632 goto fail;
b411b363
PR
1633
1634 dec_rs_pending(mdev);
1635
b411b363
PR
1636 inc_unacked(mdev);
1637 /* corresponding dec_unacked() in e_end_resync_block()
1638 * respective _drbd_clear_done_ee */
1639
db830c46 1640 peer_req->w.cb = e_end_resync_block;
45bb912b 1641
87eeee41 1642 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1643 list_add(&peer_req->w.list, &mdev->sync_ee);
87eeee41 1644 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 1645
0f0601f4 1646 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
fbe29dec 1647 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
e1c1b0fc 1648 return 0;
b411b363 1649
10f6d992
LE
1650 /* don't care for the reason here */
1651 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 1652 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 1653 list_del(&peer_req->w.list);
87eeee41 1654 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9 1655
3967deb1 1656 drbd_free_peer_req(mdev, peer_req);
45bb912b
LE
1657fail:
1658 put_ldev(mdev);
e1c1b0fc 1659 return -EIO;
b411b363
PR
1660}
1661
668eebc6 1662static struct drbd_request *
bc9c5c41
AG
1663find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1664 sector_t sector, bool missing_ok, const char *func)
51624585 1665{
51624585
AG
1666 struct drbd_request *req;
1667
bc9c5c41
AG
1668 /* Request object according to our peer */
1669 req = (struct drbd_request *)(unsigned long)id;
5e472264 1670 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
668eebc6 1671 return req;
c3afd8f5 1672 if (!missing_ok) {
5af172ed 1673 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
c3afd8f5
AG
1674 (unsigned long)id, (unsigned long long)sector);
1675 }
51624585
AG
1676 return NULL;
1677}
1678
4a76b161 1679static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1680{
4a76b161 1681 struct drbd_conf *mdev;
b411b363
PR
1682 struct drbd_request *req;
1683 sector_t sector;
82bc0194 1684 int err;
e658983a 1685 struct p_data *p = pi->data;
4a76b161
AG
1686
1687 mdev = vnr_to_mdev(tconn, pi->vnr);
1688 if (!mdev)
1689 return -EIO;
b411b363
PR
1690
1691 sector = be64_to_cpu(p->sector);
1692
87eeee41 1693 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 1694 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
87eeee41 1695 spin_unlock_irq(&mdev->tconn->req_lock);
c3afd8f5 1696 if (unlikely(!req))
82bc0194 1697 return -EIO;
b411b363 1698
24c4830c 1699 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
b411b363
PR
1700 * special casing it there for the various failure cases.
1701 * still no race with drbd_fail_pending_reads */
e2857216 1702 err = recv_dless_read(mdev, req, sector, pi->size);
82bc0194 1703 if (!err)
8554df1c 1704 req_mod(req, DATA_RECEIVED);
b411b363
PR
1705 /* else: nothing. handled from drbd_disconnect...
1706 * I don't think we may complete this just yet
1707 * in case we are "on-disconnect: freeze" */
1708
82bc0194 1709 return err;
b411b363
PR
1710}
1711
4a76b161 1712static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 1713{
4a76b161 1714 struct drbd_conf *mdev;
b411b363 1715 sector_t sector;
82bc0194 1716 int err;
e658983a 1717 struct p_data *p = pi->data;
4a76b161
AG
1718
1719 mdev = vnr_to_mdev(tconn, pi->vnr);
1720 if (!mdev)
1721 return -EIO;
b411b363
PR
1722
1723 sector = be64_to_cpu(p->sector);
1724 D_ASSERT(p->block_id == ID_SYNCER);
1725
1726 if (get_ldev(mdev)) {
1727 /* data is submitted to disk within recv_resync_read.
1728 * corresponding put_ldev done below on error,
fcefa62e 1729 * or in drbd_peer_request_endio. */
e2857216 1730 err = recv_resync_read(mdev, sector, pi->size);
b411b363
PR
1731 } else {
1732 if (__ratelimit(&drbd_ratelimit_state))
1733 dev_err(DEV, "Can not write resync data to local disk.\n");
1734
e2857216 1735 err = drbd_drain_block(mdev, pi->size);
b411b363 1736
e2857216 1737 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
b411b363
PR
1738 }
1739
e2857216 1740 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
778f271d 1741
82bc0194 1742 return err;
b411b363
PR
1743}
1744
7be8da07
AG
1745static void restart_conflicting_writes(struct drbd_conf *mdev,
1746 sector_t sector, int size)
1747{
1748 struct drbd_interval *i;
1749 struct drbd_request *req;
1750
1751 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1752 if (!i->local)
1753 continue;
1754 req = container_of(i, struct drbd_request, i);
1755 if (req->rq_state & RQ_LOCAL_PENDING ||
1756 !(req->rq_state & RQ_POSTPONED))
1757 continue;
2312f0b3
LE
1758 /* as it is RQ_POSTPONED, this will cause it to
1759 * be queued on the retry workqueue. */
1760 __req_mod(req, DISCARD_WRITE, NULL);
7be8da07
AG
1761 }
1762}
1763
a990be46
AG
1764/*
1765 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
b411b363 1766 */
99920dc5 1767static int e_end_block(struct drbd_work *w, int cancel)
b411b363 1768{
8050e6d0
AG
1769 struct drbd_peer_request *peer_req =
1770 container_of(w, struct drbd_peer_request, w);
00d56944 1771 struct drbd_conf *mdev = w->mdev;
db830c46 1772 sector_t sector = peer_req->i.sector;
99920dc5 1773 int err = 0, pcmd;
b411b363 1774
303d1448 1775 if (peer_req->flags & EE_SEND_WRITE_ACK) {
db830c46 1776 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1777 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1778 mdev->state.conn <= C_PAUSED_SYNC_T &&
db830c46 1779 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
b411b363 1780 P_RS_WRITE_ACK : P_WRITE_ACK;
99920dc5 1781 err = drbd_send_ack(mdev, pcmd, peer_req);
b411b363 1782 if (pcmd == P_RS_WRITE_ACK)
db830c46 1783 drbd_set_in_sync(mdev, sector, peer_req->i.size);
b411b363 1784 } else {
99920dc5 1785 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
b411b363
PR
1786 /* we expect it to be marked out of sync anyways...
1787 * maybe assert this? */
1788 }
1789 dec_unacked(mdev);
1790 }
1791 /* we delete from the conflict detection hash _after_ we sent out the
1792 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
302bdeae 1793 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
87eeee41 1794 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
1795 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1796 drbd_remove_epoch_entry_interval(mdev, peer_req);
7be8da07
AG
1797 if (peer_req->flags & EE_RESTART_REQUESTS)
1798 restart_conflicting_writes(mdev, sector, peer_req->i.size);
87eeee41 1799 spin_unlock_irq(&mdev->tconn->req_lock);
bb3bfe96 1800 } else
db830c46 1801 D_ASSERT(drbd_interval_empty(&peer_req->i));
b411b363 1802
1e9dd291 1803 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
b411b363 1804
99920dc5 1805 return err;
b411b363
PR
1806}
1807
7be8da07 1808static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
b411b363 1809{
7be8da07 1810 struct drbd_conf *mdev = w->mdev;
8050e6d0
AG
1811 struct drbd_peer_request *peer_req =
1812 container_of(w, struct drbd_peer_request, w);
99920dc5 1813 int err;
b411b363 1814
99920dc5 1815 err = drbd_send_ack(mdev, ack, peer_req);
b411b363
PR
1816 dec_unacked(mdev);
1817
99920dc5 1818 return err;
b411b363
PR
1819}
1820
99920dc5 1821static int e_send_discard_write(struct drbd_work *w, int unused)
7be8da07
AG
1822{
1823 return e_send_ack(w, P_DISCARD_WRITE);
1824}
1825
99920dc5 1826static int e_send_retry_write(struct drbd_work *w, int unused)
7be8da07
AG
1827{
1828 struct drbd_tconn *tconn = w->mdev->tconn;
1829
1830 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1831 P_RETRY_WRITE : P_DISCARD_WRITE);
1832}
1833
3e394da1
AG
1834static bool seq_greater(u32 a, u32 b)
1835{
1836 /*
1837 * We assume 32-bit wrap-around here.
1838 * For 24-bit wrap-around, we would have to shift:
1839 * a <<= 8; b <<= 8;
1840 */
1841 return (s32)a - (s32)b > 0;
1842}
1843
1844static u32 seq_max(u32 a, u32 b)
1845{
1846 return seq_greater(a, b) ? a : b;
1847}
1848
7be8da07
AG
1849static bool need_peer_seq(struct drbd_conf *mdev)
1850{
1851 struct drbd_tconn *tconn = mdev->tconn;
302bdeae 1852 int tp;
7be8da07
AG
1853
1854 /*
1855 * We only need to keep track of the last packet_seq number of our peer
1856 * if we are in dual-primary mode and we have the discard flag set; see
1857 * handle_write_conflicts().
1858 */
302bdeae
PR
1859
1860 rcu_read_lock();
1861 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1862 rcu_read_unlock();
1863
1864 return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags);
7be8da07
AG
1865}
1866
43ae077d 1867static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
3e394da1 1868{
3c13b680 1869 unsigned int newest_peer_seq;
3e394da1 1870
7be8da07
AG
1871 if (need_peer_seq(mdev)) {
1872 spin_lock(&mdev->peer_seq_lock);
3c13b680
LE
1873 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1874 mdev->peer_seq = newest_peer_seq;
7be8da07 1875 spin_unlock(&mdev->peer_seq_lock);
3c13b680
LE
1876 /* wake up only if we actually changed mdev->peer_seq */
1877 if (peer_seq == newest_peer_seq)
7be8da07
AG
1878 wake_up(&mdev->seq_wait);
1879 }
3e394da1
AG
1880}
1881
d93f6302
LE
1882static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1883{
1884 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1885}
1886
1887/* maybe change sync_ee into interval trees as well? */
1888static bool overlaping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
1889{
1890 struct drbd_peer_request *rs_req;
1891 bool rv = 0;
1892
1893 spin_lock_irq(&mdev->tconn->req_lock);
1894 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1895 if (overlaps(peer_req->i.sector, peer_req->i.size,
1896 rs_req->i.sector, rs_req->i.size)) {
1897 rv = 1;
1898 break;
1899 }
1900 }
1901 spin_unlock_irq(&mdev->tconn->req_lock);
1902
1903 if (rv)
1904 dev_warn(DEV, "WARN: Avoiding concurrent data/resync write to single sector.\n");
1905
1906 return rv;
1907}
1908
b411b363
PR
1909/* Called from receive_Data.
1910 * Synchronize packets on sock with packets on msock.
1911 *
1912 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1913 * packet traveling on msock, they are still processed in the order they have
1914 * been sent.
1915 *
1916 * Note: we don't care for Ack packets overtaking P_DATA packets.
1917 *
1918 * In case packet_seq is larger than mdev->peer_seq number, there are
1919 * outstanding packets on the msock. We wait for them to arrive.
1920 * In case we are the logically next packet, we update mdev->peer_seq
1921 * ourselves. Correctly handles 32bit wrap around.
1922 *
1923 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1924 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1925 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1926 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1927 *
1928 * returns 0 if we may process the packet,
1929 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
7be8da07 1930static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
b411b363
PR
1931{
1932 DEFINE_WAIT(wait);
b411b363 1933 long timeout;
7be8da07
AG
1934 int ret;
1935
1936 if (!need_peer_seq(mdev))
1937 return 0;
1938
b411b363
PR
1939 spin_lock(&mdev->peer_seq_lock);
1940 for (;;) {
7be8da07
AG
1941 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1942 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1943 ret = 0;
b411b363 1944 break;
7be8da07 1945 }
b411b363
PR
1946 if (signal_pending(current)) {
1947 ret = -ERESTARTSYS;
1948 break;
1949 }
7be8da07 1950 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
b411b363 1951 spin_unlock(&mdev->peer_seq_lock);
44ed167d
PR
1952 rcu_read_lock();
1953 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1954 rcu_read_unlock();
71b1c1eb 1955 timeout = schedule_timeout(timeout);
b411b363 1956 spin_lock(&mdev->peer_seq_lock);
7be8da07 1957 if (!timeout) {
b411b363 1958 ret = -ETIMEDOUT;
71b1c1eb 1959 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
b411b363
PR
1960 break;
1961 }
1962 }
b411b363 1963 spin_unlock(&mdev->peer_seq_lock);
7be8da07 1964 finish_wait(&mdev->seq_wait, &wait);
b411b363
PR
1965 return ret;
1966}
1967
688593c5
LE
1968/* see also bio_flags_to_wire()
1969 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1970 * flags and back. We may replicate to other kernel versions. */
1971static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
76d2e7ec 1972{
688593c5
LE
1973 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1974 (dpf & DP_FUA ? REQ_FUA : 0) |
1975 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1976 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
76d2e7ec
PR
1977}
1978
7be8da07
AG
1979static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1980 unsigned int size)
1981{
1982 struct drbd_interval *i;
1983
1984 repeat:
1985 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1986 struct drbd_request *req;
1987 struct bio_and_error m;
1988
1989 if (!i->local)
1990 continue;
1991 req = container_of(i, struct drbd_request, i);
1992 if (!(req->rq_state & RQ_POSTPONED))
1993 continue;
1994 req->rq_state &= ~RQ_POSTPONED;
1995 __req_mod(req, NEG_ACKED, &m);
1996 spin_unlock_irq(&mdev->tconn->req_lock);
1997 if (m.bio)
1998 complete_master_bio(mdev, &m);
1999 spin_lock_irq(&mdev->tconn->req_lock);
2000 goto repeat;
2001 }
2002}
2003
2004static int handle_write_conflicts(struct drbd_conf *mdev,
2005 struct drbd_peer_request *peer_req)
2006{
2007 struct drbd_tconn *tconn = mdev->tconn;
2008 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
2009 sector_t sector = peer_req->i.sector;
2010 const unsigned int size = peer_req->i.size;
2011 struct drbd_interval *i;
2012 bool equal;
2013 int err;
2014
2015 /*
2016 * Inserting the peer request into the write_requests tree will prevent
2017 * new conflicting local requests from being added.
2018 */
2019 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2020
2021 repeat:
2022 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2023 if (i == &peer_req->i)
2024 continue;
2025
2026 if (!i->local) {
2027 /*
2028 * Our peer has sent a conflicting remote request; this
2029 * should not happen in a two-node setup. Wait for the
2030 * earlier peer request to complete.
2031 */
2032 err = drbd_wait_misc(mdev, i);
2033 if (err)
2034 goto out;
2035 goto repeat;
2036 }
2037
2038 equal = i->sector == sector && i->size == size;
2039 if (resolve_conflicts) {
2040 /*
2041 * If the peer request is fully contained within the
2042 * overlapping request, it can be discarded; otherwise,
2043 * it will be retried once all overlapping requests
2044 * have completed.
2045 */
2046 bool discard = i->sector <= sector && i->sector +
2047 (i->size >> 9) >= sector + (size >> 9);
2048
2049 if (!equal)
2050 dev_alert(DEV, "Concurrent writes detected: "
2051 "local=%llus +%u, remote=%llus +%u, "
2052 "assuming %s came first\n",
2053 (unsigned long long)i->sector, i->size,
2054 (unsigned long long)sector, size,
2055 discard ? "local" : "remote");
2056
2057 inc_unacked(mdev);
2058 peer_req->w.cb = discard ? e_send_discard_write :
2059 e_send_retry_write;
2060 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2061 wake_asender(mdev->tconn);
2062
2063 err = -ENOENT;
2064 goto out;
2065 } else {
2066 struct drbd_request *req =
2067 container_of(i, struct drbd_request, i);
2068
2069 if (!equal)
2070 dev_alert(DEV, "Concurrent writes detected: "
2071 "local=%llus +%u, remote=%llus +%u\n",
2072 (unsigned long long)i->sector, i->size,
2073 (unsigned long long)sector, size);
2074
2075 if (req->rq_state & RQ_LOCAL_PENDING ||
2076 !(req->rq_state & RQ_POSTPONED)) {
2077 /*
2078 * Wait for the node with the discard flag to
2079 * decide if this request will be discarded or
2080 * retried. Requests that are discarded will
2081 * disappear from the write_requests tree.
2082 *
2083 * In addition, wait for the conflicting
2084 * request to finish locally before submitting
2085 * the conflicting peer request.
2086 */
2087 err = drbd_wait_misc(mdev, &req->i);
2088 if (err) {
2089 _conn_request_state(mdev->tconn,
2090 NS(conn, C_TIMEOUT),
2091 CS_HARD);
2092 fail_postponed_requests(mdev, sector, size);
2093 goto out;
2094 }
2095 goto repeat;
2096 }
2097 /*
2098 * Remember to restart the conflicting requests after
2099 * the new peer request has completed.
2100 */
2101 peer_req->flags |= EE_RESTART_REQUESTS;
2102 }
2103 }
2104 err = 0;
2105
2106 out:
2107 if (err)
2108 drbd_remove_epoch_entry_interval(mdev, peer_req);
2109 return err;
2110}
2111
b411b363 2112/* mirrored write */
4a76b161 2113static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 2114{
4a76b161 2115 struct drbd_conf *mdev;
b411b363 2116 sector_t sector;
db830c46 2117 struct drbd_peer_request *peer_req;
e658983a 2118 struct p_data *p = pi->data;
7be8da07 2119 u32 peer_seq = be32_to_cpu(p->seq_num);
b411b363
PR
2120 int rw = WRITE;
2121 u32 dp_flags;
302bdeae 2122 int err, tp;
b411b363 2123
4a76b161
AG
2124 mdev = vnr_to_mdev(tconn, pi->vnr);
2125 if (!mdev)
2126 return -EIO;
2127
7be8da07 2128 if (!get_ldev(mdev)) {
82bc0194
AG
2129 int err2;
2130
7be8da07 2131 err = wait_for_and_update_peer_seq(mdev, peer_seq);
e2857216 2132 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
12038a3a 2133 atomic_inc(&tconn->current_epoch->epoch_size);
e2857216 2134 err2 = drbd_drain_block(mdev, pi->size);
82bc0194
AG
2135 if (!err)
2136 err = err2;
2137 return err;
b411b363
PR
2138 }
2139
fcefa62e
AG
2140 /*
2141 * Corresponding put_ldev done either below (on various errors), or in
2142 * drbd_peer_request_endio, if we successfully submit the data at the
2143 * end of this function.
2144 */
b411b363
PR
2145
2146 sector = be64_to_cpu(p->sector);
e2857216 2147 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
db830c46 2148 if (!peer_req) {
b411b363 2149 put_ldev(mdev);
82bc0194 2150 return -EIO;
b411b363
PR
2151 }
2152
db830c46 2153 peer_req->w.cb = e_end_block;
b411b363 2154
688593c5
LE
2155 dp_flags = be32_to_cpu(p->dp_flags);
2156 rw |= wire_flags_to_bio(mdev, dp_flags);
2157
2158 if (dp_flags & DP_MAY_SET_IN_SYNC)
db830c46 2159 peer_req->flags |= EE_MAY_SET_IN_SYNC;
688593c5 2160
12038a3a
PR
2161 spin_lock(&tconn->epoch_lock);
2162 peer_req->epoch = tconn->current_epoch;
db830c46
AG
2163 atomic_inc(&peer_req->epoch->epoch_size);
2164 atomic_inc(&peer_req->epoch->active);
12038a3a 2165 spin_unlock(&tconn->epoch_lock);
b411b363 2166
302bdeae
PR
2167 rcu_read_lock();
2168 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2169 rcu_read_unlock();
2170 if (tp) {
2171 peer_req->flags |= EE_IN_INTERVAL_TREE;
7be8da07
AG
2172 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2173 if (err)
b411b363 2174 goto out_interrupted;
87eeee41 2175 spin_lock_irq(&mdev->tconn->req_lock);
7be8da07
AG
2176 err = handle_write_conflicts(mdev, peer_req);
2177 if (err) {
2178 spin_unlock_irq(&mdev->tconn->req_lock);
2179 if (err == -ENOENT) {
b411b363 2180 put_ldev(mdev);
82bc0194 2181 return 0;
b411b363 2182 }
7be8da07 2183 goto out_interrupted;
b411b363 2184 }
7be8da07
AG
2185 } else
2186 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2187 list_add(&peer_req->w.list, &mdev->active_ee);
87eeee41 2188 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2189
d93f6302
LE
2190 if (mdev->state.conn == C_SYNC_TARGET)
2191 wait_event(mdev->ee_wait, !overlaping_resync_write(mdev, peer_req));
2192
303d1448 2193 if (mdev->tconn->agreed_pro_version < 100) {
44ed167d
PR
2194 rcu_read_lock();
2195 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
303d1448
PR
2196 case DRBD_PROT_C:
2197 dp_flags |= DP_SEND_WRITE_ACK;
2198 break;
2199 case DRBD_PROT_B:
2200 dp_flags |= DP_SEND_RECEIVE_ACK;
2201 break;
2202 }
44ed167d 2203 rcu_read_unlock();
303d1448
PR
2204 }
2205
2206 if (dp_flags & DP_SEND_WRITE_ACK) {
2207 peer_req->flags |= EE_SEND_WRITE_ACK;
b411b363
PR
2208 inc_unacked(mdev);
2209 /* corresponding dec_unacked() in e_end_block()
2210 * respective _drbd_clear_done_ee */
303d1448
PR
2211 }
2212
2213 if (dp_flags & DP_SEND_RECEIVE_ACK) {
b411b363
PR
2214 /* I really don't like it that the receiver thread
2215 * sends on the msock, but anyways */
db830c46 2216 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
b411b363
PR
2217 }
2218
6719fb03 2219 if (mdev->state.pdsk < D_INCONSISTENT) {
b411b363 2220 /* In case we have the only disk of the cluster, */
db830c46
AG
2221 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2222 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2223 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
181286ad 2224 drbd_al_begin_io(mdev, &peer_req->i);
b411b363
PR
2225 }
2226
82bc0194
AG
2227 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2228 if (!err)
2229 return 0;
b411b363 2230
10f6d992
LE
2231 /* don't care for the reason here */
2232 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2233 spin_lock_irq(&mdev->tconn->req_lock);
db830c46
AG
2234 list_del(&peer_req->w.list);
2235 drbd_remove_epoch_entry_interval(mdev, peer_req);
87eeee41 2236 spin_unlock_irq(&mdev->tconn->req_lock);
db830c46 2237 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
181286ad 2238 drbd_al_complete_io(mdev, &peer_req->i);
22cc37a9 2239
b411b363 2240out_interrupted:
1e9dd291 2241 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
b411b363 2242 put_ldev(mdev);
3967deb1 2243 drbd_free_peer_req(mdev, peer_req);
82bc0194 2244 return err;
b411b363
PR
2245}
2246
0f0601f4
LE
2247/* We may throttle resync, if the lower device seems to be busy,
2248 * and current sync rate is above c_min_rate.
2249 *
2250 * To decide whether or not the lower device is busy, we use a scheme similar
2251 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2252 * (more than 64 sectors) of activity we cannot account for with our own resync
2253 * activity, it obviously is "busy".
2254 *
2255 * The current sync rate used here uses only the most recent two step marks,
2256 * to have a short time average so we can react faster.
2257 */
e3555d85 2258int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
0f0601f4
LE
2259{
2260 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2261 unsigned long db, dt, dbdt;
e3555d85 2262 struct lc_element *tmp;
0f0601f4
LE
2263 int curr_events;
2264 int throttle = 0;
daeda1cc
PR
2265 unsigned int c_min_rate;
2266
2267 rcu_read_lock();
2268 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2269 rcu_read_unlock();
0f0601f4
LE
2270
2271 /* feature disabled? */
daeda1cc 2272 if (c_min_rate == 0)
0f0601f4
LE
2273 return 0;
2274
e3555d85
PR
2275 spin_lock_irq(&mdev->al_lock);
2276 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2277 if (tmp) {
2278 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2279 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2280 spin_unlock_irq(&mdev->al_lock);
2281 return 0;
2282 }
2283 /* Do not slow down if app IO is already waiting for this extent */
2284 }
2285 spin_unlock_irq(&mdev->al_lock);
2286
0f0601f4
LE
2287 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2288 (int)part_stat_read(&disk->part0, sectors[1]) -
2289 atomic_read(&mdev->rs_sect_ev);
e3555d85 2290
0f0601f4
LE
2291 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2292 unsigned long rs_left;
2293 int i;
2294
2295 mdev->rs_last_events = curr_events;
2296
2297 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2298 * approx. */
2649f080
LE
2299 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2300
2301 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2302 rs_left = mdev->ov_left;
2303 else
2304 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
0f0601f4
LE
2305
2306 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2307 if (!dt)
2308 dt++;
2309 db = mdev->rs_mark_left[i] - rs_left;
2310 dbdt = Bit2KB(db/dt);
2311
daeda1cc 2312 if (dbdt > c_min_rate)
0f0601f4
LE
2313 throttle = 1;
2314 }
2315 return throttle;
2316}
2317
2318
4a76b161 2319static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 2320{
4a76b161 2321 struct drbd_conf *mdev;
b411b363 2322 sector_t sector;
4a76b161 2323 sector_t capacity;
db830c46 2324 struct drbd_peer_request *peer_req;
b411b363 2325 struct digest_info *di = NULL;
b18b37be 2326 int size, verb;
b411b363 2327 unsigned int fault_type;
e658983a 2328 struct p_block_req *p = pi->data;
4a76b161
AG
2329
2330 mdev = vnr_to_mdev(tconn, pi->vnr);
2331 if (!mdev)
2332 return -EIO;
2333 capacity = drbd_get_capacity(mdev->this_bdev);
b411b363
PR
2334
2335 sector = be64_to_cpu(p->sector);
2336 size = be32_to_cpu(p->blksize);
2337
c670a398 2338 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
2339 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2340 (unsigned long long)sector, size);
82bc0194 2341 return -EINVAL;
b411b363
PR
2342 }
2343 if (sector + (size>>9) > capacity) {
2344 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2345 (unsigned long long)sector, size);
82bc0194 2346 return -EINVAL;
b411b363
PR
2347 }
2348
2349 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
b18b37be 2350 verb = 1;
e2857216 2351 switch (pi->cmd) {
b18b37be
PR
2352 case P_DATA_REQUEST:
2353 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2354 break;
2355 case P_RS_DATA_REQUEST:
2356 case P_CSUM_RS_REQUEST:
2357 case P_OV_REQUEST:
2358 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2359 break;
2360 case P_OV_REPLY:
2361 verb = 0;
2362 dec_rs_pending(mdev);
2363 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2364 break;
2365 default:
49ba9b1b 2366 BUG();
b18b37be
PR
2367 }
2368 if (verb && __ratelimit(&drbd_ratelimit_state))
b411b363
PR
2369 dev_err(DEV, "Can not satisfy peer's read request, "
2370 "no local data.\n");
b18b37be 2371
a821cc4a 2372 /* drain possibly payload */
e2857216 2373 return drbd_drain_block(mdev, pi->size);
b411b363
PR
2374 }
2375
2376 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2377 * "criss-cross" setup, that might cause write-out on some other DRBD,
2378 * which in turn might block on the other node at this very place. */
0db55363 2379 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
db830c46 2380 if (!peer_req) {
b411b363 2381 put_ldev(mdev);
82bc0194 2382 return -ENOMEM;
b411b363
PR
2383 }
2384
e2857216 2385 switch (pi->cmd) {
b411b363 2386 case P_DATA_REQUEST:
db830c46 2387 peer_req->w.cb = w_e_end_data_req;
b411b363 2388 fault_type = DRBD_FAULT_DT_RD;
80a40e43
LE
2389 /* application IO, don't drbd_rs_begin_io */
2390 goto submit;
2391
b411b363 2392 case P_RS_DATA_REQUEST:
db830c46 2393 peer_req->w.cb = w_e_end_rsdata_req;
b411b363 2394 fault_type = DRBD_FAULT_RS_RD;
5f9915bb
LE
2395 /* used in the sector offset progress display */
2396 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
b411b363
PR
2397 break;
2398
2399 case P_OV_REPLY:
2400 case P_CSUM_RS_REQUEST:
2401 fault_type = DRBD_FAULT_RS_RD;
e2857216 2402 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
b411b363
PR
2403 if (!di)
2404 goto out_free_e;
2405
e2857216 2406 di->digest_size = pi->size;
b411b363
PR
2407 di->digest = (((char *)di)+sizeof(struct digest_info));
2408
db830c46
AG
2409 peer_req->digest = di;
2410 peer_req->flags |= EE_HAS_DIGEST;
c36c3ced 2411
e2857216 2412 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
b411b363
PR
2413 goto out_free_e;
2414
e2857216 2415 if (pi->cmd == P_CSUM_RS_REQUEST) {
31890f4a 2416 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
db830c46 2417 peer_req->w.cb = w_e_end_csum_rs_req;
5f9915bb
LE
2418 /* used in the sector offset progress display */
2419 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
e2857216 2420 } else if (pi->cmd == P_OV_REPLY) {
2649f080
LE
2421 /* track progress, we may need to throttle */
2422 atomic_add(size >> 9, &mdev->rs_sect_in);
db830c46 2423 peer_req->w.cb = w_e_end_ov_reply;
b411b363 2424 dec_rs_pending(mdev);
0f0601f4
LE
2425 /* drbd_rs_begin_io done when we sent this request,
2426 * but accounting still needs to be done. */
2427 goto submit_for_resync;
b411b363
PR
2428 }
2429 break;
2430
2431 case P_OV_REQUEST:
b411b363 2432 if (mdev->ov_start_sector == ~(sector_t)0 &&
31890f4a 2433 mdev->tconn->agreed_pro_version >= 90) {
de228bba
LE
2434 unsigned long now = jiffies;
2435 int i;
b411b363
PR
2436 mdev->ov_start_sector = sector;
2437 mdev->ov_position = sector;
30b743a2
LE
2438 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2439 mdev->rs_total = mdev->ov_left;
de228bba
LE
2440 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2441 mdev->rs_mark_left[i] = mdev->ov_left;
2442 mdev->rs_mark_time[i] = now;
2443 }
b411b363
PR
2444 dev_info(DEV, "Online Verify start sector: %llu\n",
2445 (unsigned long long)sector);
2446 }
db830c46 2447 peer_req->w.cb = w_e_end_ov_req;
b411b363 2448 fault_type = DRBD_FAULT_RS_RD;
b411b363
PR
2449 break;
2450
b411b363 2451 default:
49ba9b1b 2452 BUG();
b411b363
PR
2453 }
2454
0f0601f4
LE
2455 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2456 * wrt the receiver, but it is not as straightforward as it may seem.
2457 * Various places in the resync start and stop logic assume resync
2458 * requests are processed in order, requeuing this on the worker thread
2459 * introduces a bunch of new code for synchronization between threads.
2460 *
2461 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2462 * "forever", throttling after drbd_rs_begin_io will lock that extent
2463 * for application writes for the same time. For now, just throttle
2464 * here, where the rest of the code expects the receiver to sleep for
2465 * a while, anyways.
2466 */
2467
2468 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2469 * this defers syncer requests for some time, before letting at least
2470 * on request through. The resync controller on the receiving side
2471 * will adapt to the incoming rate accordingly.
2472 *
2473 * We cannot throttle here if remote is Primary/SyncTarget:
2474 * we would also throttle its application reads.
2475 * In that case, throttling is done on the SyncTarget only.
2476 */
e3555d85
PR
2477 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2478 schedule_timeout_uninterruptible(HZ/10);
2479 if (drbd_rs_begin_io(mdev, sector))
80a40e43 2480 goto out_free_e;
b411b363 2481
0f0601f4
LE
2482submit_for_resync:
2483 atomic_add(size >> 9, &mdev->rs_sect_ev);
2484
80a40e43 2485submit:
b411b363 2486 inc_unacked(mdev);
87eeee41 2487 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2488 list_add_tail(&peer_req->w.list, &mdev->read_ee);
87eeee41 2489 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 2490
fbe29dec 2491 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
82bc0194 2492 return 0;
b411b363 2493
10f6d992
LE
2494 /* don't care for the reason here */
2495 dev_err(DEV, "submit failed, triggering re-connect\n");
87eeee41 2496 spin_lock_irq(&mdev->tconn->req_lock);
db830c46 2497 list_del(&peer_req->w.list);
87eeee41 2498 spin_unlock_irq(&mdev->tconn->req_lock);
22cc37a9
LE
2499 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2500
b411b363 2501out_free_e:
b411b363 2502 put_ldev(mdev);
3967deb1 2503 drbd_free_peer_req(mdev, peer_req);
82bc0194 2504 return -EIO;
b411b363
PR
2505}
2506
2507static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2508{
2509 int self, peer, rv = -100;
2510 unsigned long ch_self, ch_peer;
44ed167d 2511 enum drbd_after_sb_p after_sb_0p;
b411b363
PR
2512
2513 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2514 peer = mdev->p_uuid[UI_BITMAP] & 1;
2515
2516 ch_peer = mdev->p_uuid[UI_SIZE];
2517 ch_self = mdev->comm_bm_set;
2518
44ed167d
PR
2519 rcu_read_lock();
2520 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2521 rcu_read_unlock();
2522 switch (after_sb_0p) {
b411b363
PR
2523 case ASB_CONSENSUS:
2524 case ASB_DISCARD_SECONDARY:
2525 case ASB_CALL_HELPER:
44ed167d 2526 case ASB_VIOLENTLY:
b411b363
PR
2527 dev_err(DEV, "Configuration error.\n");
2528 break;
2529 case ASB_DISCONNECT:
2530 break;
2531 case ASB_DISCARD_YOUNGER_PRI:
2532 if (self == 0 && peer == 1) {
2533 rv = -1;
2534 break;
2535 }
2536 if (self == 1 && peer == 0) {
2537 rv = 1;
2538 break;
2539 }
2540 /* Else fall through to one of the other strategies... */
2541 case ASB_DISCARD_OLDER_PRI:
2542 if (self == 0 && peer == 1) {
2543 rv = 1;
2544 break;
2545 }
2546 if (self == 1 && peer == 0) {
2547 rv = -1;
2548 break;
2549 }
2550 /* Else fall through to one of the other strategies... */
ad19bf6e 2551 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
b411b363
PR
2552 "Using discard-least-changes instead\n");
2553 case ASB_DISCARD_ZERO_CHG:
2554 if (ch_peer == 0 && ch_self == 0) {
25703f83 2555 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2556 ? -1 : 1;
2557 break;
2558 } else {
2559 if (ch_peer == 0) { rv = 1; break; }
2560 if (ch_self == 0) { rv = -1; break; }
2561 }
44ed167d 2562 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
b411b363
PR
2563 break;
2564 case ASB_DISCARD_LEAST_CHG:
2565 if (ch_self < ch_peer)
2566 rv = -1;
2567 else if (ch_self > ch_peer)
2568 rv = 1;
2569 else /* ( ch_self == ch_peer ) */
2570 /* Well, then use something else. */
25703f83 2571 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
b411b363
PR
2572 ? -1 : 1;
2573 break;
2574 case ASB_DISCARD_LOCAL:
2575 rv = -1;
2576 break;
2577 case ASB_DISCARD_REMOTE:
2578 rv = 1;
2579 }
2580
2581 return rv;
2582}
2583
2584static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2585{
6184ea21 2586 int hg, rv = -100;
44ed167d 2587 enum drbd_after_sb_p after_sb_1p;
b411b363 2588
44ed167d
PR
2589 rcu_read_lock();
2590 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2591 rcu_read_unlock();
2592 switch (after_sb_1p) {
b411b363
PR
2593 case ASB_DISCARD_YOUNGER_PRI:
2594 case ASB_DISCARD_OLDER_PRI:
2595 case ASB_DISCARD_LEAST_CHG:
2596 case ASB_DISCARD_LOCAL:
2597 case ASB_DISCARD_REMOTE:
44ed167d 2598 case ASB_DISCARD_ZERO_CHG:
b411b363
PR
2599 dev_err(DEV, "Configuration error.\n");
2600 break;
2601 case ASB_DISCONNECT:
2602 break;
2603 case ASB_CONSENSUS:
2604 hg = drbd_asb_recover_0p(mdev);
2605 if (hg == -1 && mdev->state.role == R_SECONDARY)
2606 rv = hg;
2607 if (hg == 1 && mdev->state.role == R_PRIMARY)
2608 rv = hg;
2609 break;
2610 case ASB_VIOLENTLY:
2611 rv = drbd_asb_recover_0p(mdev);
2612 break;
2613 case ASB_DISCARD_SECONDARY:
2614 return mdev->state.role == R_PRIMARY ? 1 : -1;
2615 case ASB_CALL_HELPER:
2616 hg = drbd_asb_recover_0p(mdev);
2617 if (hg == -1 && mdev->state.role == R_PRIMARY) {
bb437946
AG
2618 enum drbd_state_rv rv2;
2619
2620 drbd_set_role(mdev, R_SECONDARY, 0);
b411b363
PR
2621 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2622 * we might be here in C_WF_REPORT_PARAMS which is transient.
2623 * we do not need to wait for the after state change work either. */
bb437946
AG
2624 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2625 if (rv2 != SS_SUCCESS) {
b411b363
PR
2626 drbd_khelper(mdev, "pri-lost-after-sb");
2627 } else {
2628 dev_warn(DEV, "Successfully gave up primary role.\n");
2629 rv = hg;
2630 }
2631 } else
2632 rv = hg;
2633 }
2634
2635 return rv;
2636}
2637
2638static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2639{
6184ea21 2640 int hg, rv = -100;
44ed167d 2641 enum drbd_after_sb_p after_sb_2p;
b411b363 2642
44ed167d
PR
2643 rcu_read_lock();
2644 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2645 rcu_read_unlock();
2646 switch (after_sb_2p) {
b411b363
PR
2647 case ASB_DISCARD_YOUNGER_PRI:
2648 case ASB_DISCARD_OLDER_PRI:
2649 case ASB_DISCARD_LEAST_CHG:
2650 case ASB_DISCARD_LOCAL:
2651 case ASB_DISCARD_REMOTE:
2652 case ASB_CONSENSUS:
2653 case ASB_DISCARD_SECONDARY:
44ed167d 2654 case ASB_DISCARD_ZERO_CHG:
b411b363
PR
2655 dev_err(DEV, "Configuration error.\n");
2656 break;
2657 case ASB_VIOLENTLY:
2658 rv = drbd_asb_recover_0p(mdev);
2659 break;
2660 case ASB_DISCONNECT:
2661 break;
2662 case ASB_CALL_HELPER:
2663 hg = drbd_asb_recover_0p(mdev);
2664 if (hg == -1) {
bb437946
AG
2665 enum drbd_state_rv rv2;
2666
b411b363
PR
2667 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2668 * we might be here in C_WF_REPORT_PARAMS which is transient.
2669 * we do not need to wait for the after state change work either. */
bb437946
AG
2670 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2671 if (rv2 != SS_SUCCESS) {
b411b363
PR
2672 drbd_khelper(mdev, "pri-lost-after-sb");
2673 } else {
2674 dev_warn(DEV, "Successfully gave up primary role.\n");
2675 rv = hg;
2676 }
2677 } else
2678 rv = hg;
2679 }
2680
2681 return rv;
2682}
2683
2684static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2685 u64 bits, u64 flags)
2686{
2687 if (!uuid) {
2688 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2689 return;
2690 }
2691 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2692 text,
2693 (unsigned long long)uuid[UI_CURRENT],
2694 (unsigned long long)uuid[UI_BITMAP],
2695 (unsigned long long)uuid[UI_HISTORY_START],
2696 (unsigned long long)uuid[UI_HISTORY_END],
2697 (unsigned long long)bits,
2698 (unsigned long long)flags);
2699}
2700
2701/*
2702 100 after split brain try auto recover
2703 2 C_SYNC_SOURCE set BitMap
2704 1 C_SYNC_SOURCE use BitMap
2705 0 no Sync
2706 -1 C_SYNC_TARGET use BitMap
2707 -2 C_SYNC_TARGET set BitMap
2708 -100 after split brain, disconnect
2709-1000 unrelated data
4a23f264
PR
2710-1091 requires proto 91
2711-1096 requires proto 96
b411b363
PR
2712 */
2713static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2714{
2715 u64 self, peer;
2716 int i, j;
2717
2718 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2719 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2720
2721 *rule_nr = 10;
2722 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2723 return 0;
2724
2725 *rule_nr = 20;
2726 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2727 peer != UUID_JUST_CREATED)
2728 return -2;
2729
2730 *rule_nr = 30;
2731 if (self != UUID_JUST_CREATED &&
2732 (peer == UUID_JUST_CREATED || peer == (u64)0))
2733 return 2;
2734
2735 if (self == peer) {
2736 int rct, dc; /* roles at crash time */
2737
2738 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2739
31890f4a 2740 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2741 return -1091;
b411b363
PR
2742
2743 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2744 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2745 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2746 drbd_uuid_set_bm(mdev, 0UL);
2747
2748 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2749 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2750 *rule_nr = 34;
2751 } else {
2752 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2753 *rule_nr = 36;
2754 }
2755
2756 return 1;
2757 }
2758
2759 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2760
31890f4a 2761 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2762 return -1091;
b411b363
PR
2763
2764 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2765 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2766 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2767
2768 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2769 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2770 mdev->p_uuid[UI_BITMAP] = 0UL;
2771
2772 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2773 *rule_nr = 35;
2774 } else {
2775 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2776 *rule_nr = 37;
2777 }
2778
2779 return -1;
2780 }
2781
2782 /* Common power [off|failure] */
2783 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2784 (mdev->p_uuid[UI_FLAGS] & 2);
2785 /* lowest bit is set when we were primary,
2786 * next bit (weight 2) is set when peer was primary */
2787 *rule_nr = 40;
2788
2789 switch (rct) {
2790 case 0: /* !self_pri && !peer_pri */ return 0;
2791 case 1: /* self_pri && !peer_pri */ return 1;
2792 case 2: /* !self_pri && peer_pri */ return -1;
2793 case 3: /* self_pri && peer_pri */
25703f83 2794 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2795 return dc ? -1 : 1;
2796 }
2797 }
2798
2799 *rule_nr = 50;
2800 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2801 if (self == peer)
2802 return -1;
2803
2804 *rule_nr = 51;
2805 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2806 if (self == peer) {
31890f4a 2807 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2808 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2809 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2810 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2811 /* The last P_SYNC_UUID did not get though. Undo the last start of
2812 resync as sync source modifications of the peer's UUIDs. */
2813
31890f4a 2814 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2815 return -1091;
b411b363
PR
2816
2817 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2818 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
4a23f264
PR
2819
2820 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2821 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2822
b411b363
PR
2823 return -1;
2824 }
2825 }
2826
2827 *rule_nr = 60;
2828 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2829 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2830 peer = mdev->p_uuid[i] & ~((u64)1);
2831 if (self == peer)
2832 return -2;
2833 }
2834
2835 *rule_nr = 70;
2836 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2837 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2838 if (self == peer)
2839 return 1;
2840
2841 *rule_nr = 71;
2842 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2843 if (self == peer) {
31890f4a 2844 if (mdev->tconn->agreed_pro_version < 96 ?
4a23f264
PR
2845 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2846 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2847 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
b411b363
PR
2848 /* The last P_SYNC_UUID did not get though. Undo the last start of
2849 resync as sync source modifications of our UUIDs. */
2850
31890f4a 2851 if (mdev->tconn->agreed_pro_version < 91)
4a23f264 2852 return -1091;
b411b363
PR
2853
2854 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2855 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2856
4a23f264 2857 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
b411b363
PR
2858 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2859 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2860
2861 return 1;
2862 }
2863 }
2864
2865
2866 *rule_nr = 80;
d8c2a36b 2867 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
b411b363
PR
2868 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2869 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2870 if (self == peer)
2871 return 2;
2872 }
2873
2874 *rule_nr = 90;
2875 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2876 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2877 if (self == peer && self != ((u64)0))
2878 return 100;
2879
2880 *rule_nr = 100;
2881 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2882 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2883 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2884 peer = mdev->p_uuid[j] & ~((u64)1);
2885 if (self == peer)
2886 return -100;
2887 }
2888 }
2889
2890 return -1000;
2891}
2892
2893/* drbd_sync_handshake() returns the new conn state on success, or
2894 CONN_MASK (-1) on failure.
2895 */
2896static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2897 enum drbd_disk_state peer_disk) __must_hold(local)
2898{
b411b363
PR
2899 enum drbd_conns rv = C_MASK;
2900 enum drbd_disk_state mydisk;
44ed167d 2901 struct net_conf *nc;
6dff2902 2902 int hg, rule_nr, rr_conflict, tentative;
b411b363
PR
2903
2904 mydisk = mdev->state.disk;
2905 if (mydisk == D_NEGOTIATING)
2906 mydisk = mdev->new_state_tmp.disk;
2907
2908 dev_info(DEV, "drbd_sync_handshake:\n");
2909 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2910 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2911 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2912
2913 hg = drbd_uuid_compare(mdev, &rule_nr);
2914
2915 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2916
2917 if (hg == -1000) {
2918 dev_alert(DEV, "Unrelated data, aborting!\n");
2919 return C_MASK;
2920 }
4a23f264
PR
2921 if (hg < -1000) {
2922 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
b411b363
PR
2923 return C_MASK;
2924 }
2925
2926 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2927 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2928 int f = (hg == -100) || abs(hg) == 2;
2929 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2930 if (f)
2931 hg = hg*2;
2932 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2933 hg > 0 ? "source" : "target");
2934 }
2935
3a11a487
AG
2936 if (abs(hg) == 100)
2937 drbd_khelper(mdev, "initial-split-brain");
2938
44ed167d
PR
2939 rcu_read_lock();
2940 nc = rcu_dereference(mdev->tconn->net_conf);
2941
2942 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
b411b363
PR
2943 int pcount = (mdev->state.role == R_PRIMARY)
2944 + (peer_role == R_PRIMARY);
2945 int forced = (hg == -100);
2946
2947 switch (pcount) {
2948 case 0:
2949 hg = drbd_asb_recover_0p(mdev);
2950 break;
2951 case 1:
2952 hg = drbd_asb_recover_1p(mdev);
2953 break;
2954 case 2:
2955 hg = drbd_asb_recover_2p(mdev);
2956 break;
2957 }
2958 if (abs(hg) < 100) {
2959 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2960 "automatically solved. Sync from %s node\n",
2961 pcount, (hg < 0) ? "peer" : "this");
2962 if (forced) {
2963 dev_warn(DEV, "Doing a full sync, since"
2964 " UUIDs where ambiguous.\n");
2965 hg = hg*2;
2966 }
2967 }
2968 }
2969
2970 if (hg == -100) {
08b165ba 2971 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
b411b363 2972 hg = -1;
08b165ba 2973 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
b411b363
PR
2974 hg = 1;
2975
2976 if (abs(hg) < 100)
2977 dev_warn(DEV, "Split-Brain detected, manually solved. "
2978 "Sync from %s node\n",
2979 (hg < 0) ? "peer" : "this");
2980 }
44ed167d 2981 rr_conflict = nc->rr_conflict;
6dff2902 2982 tentative = nc->tentative;
44ed167d 2983 rcu_read_unlock();
b411b363
PR
2984
2985 if (hg == -100) {
580b9767
LE
2986 /* FIXME this log message is not correct if we end up here
2987 * after an attempted attach on a diskless node.
2988 * We just refuse to attach -- well, we drop the "connection"
2989 * to that disk, in a way... */
3a11a487 2990 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
b411b363
PR
2991 drbd_khelper(mdev, "split-brain");
2992 return C_MASK;
2993 }
2994
2995 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2996 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2997 return C_MASK;
2998 }
2999
3000 if (hg < 0 && /* by intention we do not use mydisk here. */
3001 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
44ed167d 3002 switch (rr_conflict) {
b411b363
PR
3003 case ASB_CALL_HELPER:
3004 drbd_khelper(mdev, "pri-lost");
3005 /* fall through */
3006 case ASB_DISCONNECT:
3007 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3008 return C_MASK;
3009 case ASB_VIOLENTLY:
3010 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3011 "assumption\n");
3012 }
3013 }
3014
6dff2902 3015 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
cf14c2e9
PR
3016 if (hg == 0)
3017 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3018 else
3019 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3020 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3021 abs(hg) >= 2 ? "full" : "bit-map based");
3022 return C_MASK;
3023 }
3024
b411b363
PR
3025 if (abs(hg) >= 2) {
3026 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
20ceb2b2
LE
3027 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3028 BM_LOCKED_SET_ALLOWED))
b411b363
PR
3029 return C_MASK;
3030 }
3031
3032 if (hg > 0) { /* become sync source. */
3033 rv = C_WF_BITMAP_S;
3034 } else if (hg < 0) { /* become sync target */
3035 rv = C_WF_BITMAP_T;
3036 } else {
3037 rv = C_CONNECTED;
3038 if (drbd_bm_total_weight(mdev)) {
3039 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3040 drbd_bm_total_weight(mdev));
3041 }
3042 }
3043
3044 return rv;
3045}
3046
f179d76d 3047static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
b411b363
PR
3048{
3049 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
f179d76d
PR
3050 if (peer == ASB_DISCARD_REMOTE)
3051 return ASB_DISCARD_LOCAL;
b411b363
PR
3052
3053 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
f179d76d
PR
3054 if (peer == ASB_DISCARD_LOCAL)
3055 return ASB_DISCARD_REMOTE;
b411b363
PR
3056
3057 /* everything else is valid if they are equal on both sides. */
f179d76d 3058 return peer;
b411b363
PR
3059}
3060
e2857216 3061static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3062{
e658983a 3063 struct p_protocol *p = pi->data;
036b17ea
PR
3064 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3065 int p_proto, p_discard_my_data, p_two_primaries, cf;
3066 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3067 char integrity_alg[SHARED_SECRET_MAX] = "";
accdbcc5 3068 struct crypto_hash *peer_integrity_tfm = NULL;
7aca6c75 3069 void *int_dig_in = NULL, *int_dig_vv = NULL;
b411b363 3070
b411b363
PR
3071 p_proto = be32_to_cpu(p->protocol);
3072 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3073 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3074 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
b411b363 3075 p_two_primaries = be32_to_cpu(p->two_primaries);
cf14c2e9 3076 cf = be32_to_cpu(p->conn_flags);
6139f60d 3077 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
cf14c2e9 3078
86db0618
AG
3079 if (tconn->agreed_pro_version >= 87) {
3080 int err;
3081
88104ca4 3082 if (pi->size > sizeof(integrity_alg))
86db0618 3083 return -EIO;
88104ca4 3084 err = drbd_recv_all(tconn, integrity_alg, pi->size);
86db0618
AG
3085 if (err)
3086 return err;
036b17ea
PR
3087 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3088 }
88104ca4 3089
7d4c782c 3090 if (pi->cmd != P_PROTOCOL_UPDATE) {
fbc12f45 3091 clear_bit(CONN_DRY_RUN, &tconn->flags);
036b17ea 3092
fbc12f45
AG
3093 if (cf & CF_DRY_RUN)
3094 set_bit(CONN_DRY_RUN, &tconn->flags);
cf14c2e9 3095
fbc12f45
AG
3096 rcu_read_lock();
3097 nc = rcu_dereference(tconn->net_conf);
b411b363 3098
fbc12f45 3099 if (p_proto != nc->wire_protocol) {
d505d9be 3100 conn_err(tconn, "incompatible %s settings\n", "protocol");
fbc12f45
AG
3101 goto disconnect_rcu_unlock;
3102 }
44ed167d 3103
fbc12f45 3104 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
d505d9be 3105 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
fbc12f45
AG
3106 goto disconnect_rcu_unlock;
3107 }
b411b363 3108
fbc12f45 3109 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
d505d9be 3110 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
fbc12f45
AG
3111 goto disconnect_rcu_unlock;
3112 }
b411b363 3113
fbc12f45 3114 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
d505d9be 3115 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
fbc12f45
AG
3116 goto disconnect_rcu_unlock;
3117 }
b411b363 3118
fbc12f45 3119 if (p_discard_my_data && nc->discard_my_data) {
d505d9be 3120 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
fbc12f45
AG
3121 goto disconnect_rcu_unlock;
3122 }
b411b363 3123
fbc12f45 3124 if (p_two_primaries != nc->two_primaries) {
d505d9be 3125 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
fbc12f45
AG
3126 goto disconnect_rcu_unlock;
3127 }
b411b363 3128
fbc12f45 3129 if (strcmp(integrity_alg, nc->integrity_alg)) {
d505d9be 3130 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
fbc12f45
AG
3131 goto disconnect_rcu_unlock;
3132 }
b411b363 3133
fbc12f45 3134 rcu_read_unlock();
036b17ea 3135 }
7d4c782c
AG
3136
3137 if (integrity_alg[0]) {
3138 int hash_size;
3139
3140 /*
3141 * We can only change the peer data integrity algorithm
3142 * here. Changing our own data integrity algorithm
3143 * requires that we send a P_PROTOCOL_UPDATE packet at
3144 * the same time; otherwise, the peer has no way to
3145 * tell between which packets the algorithm should
3146 * change.
3147 */
3148
3149 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3150 if (!peer_integrity_tfm) {
3151 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3152 integrity_alg);
3153 goto disconnect;
3154 }
3155
3156 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3157 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3158 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3159 if (!(int_dig_in && int_dig_vv)) {
3160 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3161 goto disconnect;
3162 }
3163 }
3164
3165 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3166 if (!new_net_conf) {
3167 conn_err(tconn, "Allocation of new net_conf failed\n");
3168 goto disconnect;
3169 }
3170
3171 mutex_lock(&tconn->data.mutex);
3172 mutex_lock(&tconn->conf_update);
3173 old_net_conf = tconn->net_conf;
3174 *new_net_conf = *old_net_conf;
3175
3176 new_net_conf->wire_protocol = p_proto;
3177 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3178 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3179 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3180 new_net_conf->two_primaries = p_two_primaries;
3181
3182 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3183 mutex_unlock(&tconn->conf_update);
3184 mutex_unlock(&tconn->data.mutex);
3185
3186 crypto_free_hash(tconn->peer_integrity_tfm);
3187 kfree(tconn->int_dig_in);
3188 kfree(tconn->int_dig_vv);
3189 tconn->peer_integrity_tfm = peer_integrity_tfm;
3190 tconn->int_dig_in = int_dig_in;
3191 tconn->int_dig_vv = int_dig_vv;
3192
3193 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3194 conn_info(tconn, "peer data-integrity-alg: %s\n",
3195 integrity_alg[0] ? integrity_alg : "(none)");
3196
3197 synchronize_rcu();
3198 kfree(old_net_conf);
82bc0194 3199 return 0;
b411b363 3200
44ed167d
PR
3201disconnect_rcu_unlock:
3202 rcu_read_unlock();
b411b363 3203disconnect:
b792c35c 3204 crypto_free_hash(peer_integrity_tfm);
036b17ea
PR
3205 kfree(int_dig_in);
3206 kfree(int_dig_vv);
7204624c 3207 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3208 return -EIO;
b411b363
PR
3209}
3210
3211/* helper function
3212 * input: alg name, feature name
3213 * return: NULL (alg name was "")
3214 * ERR_PTR(error) if something goes wrong
3215 * or the crypto hash ptr, if it worked out ok. */
3216struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3217 const char *alg, const char *name)
3218{
3219 struct crypto_hash *tfm;
3220
3221 if (!alg[0])
3222 return NULL;
3223
3224 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3225 if (IS_ERR(tfm)) {
3226 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3227 alg, name, PTR_ERR(tfm));
3228 return tfm;
3229 }
b411b363
PR
3230 return tfm;
3231}
3232
4a76b161
AG
3233static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
3234{
3235 void *buffer = tconn->data.rbuf;
3236 int size = pi->size;
3237
3238 while (size) {
3239 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3240 s = drbd_recv(tconn, buffer, s);
3241 if (s <= 0) {
3242 if (s < 0)
3243 return s;
3244 break;
3245 }
3246 size -= s;
3247 }
3248 if (size)
3249 return -EIO;
3250 return 0;
3251}
3252
3253/*
3254 * config_unknown_volume - device configuration command for unknown volume
3255 *
3256 * When a device is added to an existing connection, the node on which the
3257 * device is added first will send configuration commands to its peer but the
3258 * peer will not know about the device yet. It will warn and ignore these
3259 * commands. Once the device is added on the second node, the second node will
3260 * send the same device configuration commands, but in the other direction.
3261 *
3262 * (We can also end up here if drbd is misconfigured.)
3263 */
3264static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3265{
2fcb8f30
AG
3266 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3267 cmdname(pi->cmd), pi->vnr);
4a76b161
AG
3268 return ignore_remaining_packet(tconn, pi);
3269}
3270
3271static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3272{
4a76b161 3273 struct drbd_conf *mdev;
e658983a 3274 struct p_rs_param_95 *p;
b411b363
PR
3275 unsigned int header_size, data_size, exp_max_sz;
3276 struct crypto_hash *verify_tfm = NULL;
3277 struct crypto_hash *csums_tfm = NULL;
2ec91e0e 3278 struct net_conf *old_net_conf, *new_net_conf = NULL;
813472ce 3279 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
4a76b161 3280 const int apv = tconn->agreed_pro_version;
813472ce 3281 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
778f271d 3282 int fifo_size = 0;
82bc0194 3283 int err;
b411b363 3284
4a76b161
AG
3285 mdev = vnr_to_mdev(tconn, pi->vnr);
3286 if (!mdev)
3287 return config_unknown_volume(tconn, pi);
3288
b411b363
PR
3289 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3290 : apv == 88 ? sizeof(struct p_rs_param)
3291 + SHARED_SECRET_MAX
8e26f9cc
PR
3292 : apv <= 94 ? sizeof(struct p_rs_param_89)
3293 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
b411b363 3294
e2857216 3295 if (pi->size > exp_max_sz) {
b411b363 3296 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
e2857216 3297 pi->size, exp_max_sz);
82bc0194 3298 return -EIO;
b411b363
PR
3299 }
3300
3301 if (apv <= 88) {
e658983a 3302 header_size = sizeof(struct p_rs_param);
e2857216 3303 data_size = pi->size - header_size;
8e26f9cc 3304 } else if (apv <= 94) {
e658983a 3305 header_size = sizeof(struct p_rs_param_89);
e2857216 3306 data_size = pi->size - header_size;
b411b363 3307 D_ASSERT(data_size == 0);
8e26f9cc 3308 } else {
e658983a 3309 header_size = sizeof(struct p_rs_param_95);
e2857216 3310 data_size = pi->size - header_size;
b411b363
PR
3311 D_ASSERT(data_size == 0);
3312 }
3313
3314 /* initialize verify_alg and csums_alg */
e658983a 3315 p = pi->data;
b411b363
PR
3316 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3317
e658983a 3318 err = drbd_recv_all(mdev->tconn, p, header_size);
82bc0194
AG
3319 if (err)
3320 return err;
b411b363 3321
daeda1cc
PR
3322 mutex_lock(&mdev->tconn->conf_update);
3323 old_net_conf = mdev->tconn->net_conf;
813472ce
PR
3324 if (get_ldev(mdev)) {
3325 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3326 if (!new_disk_conf) {
3327 put_ldev(mdev);
3328 mutex_unlock(&mdev->tconn->conf_update);
3329 dev_err(DEV, "Allocation of new disk_conf failed\n");
3330 return -ENOMEM;
3331 }
daeda1cc 3332
813472ce
PR
3333 old_disk_conf = mdev->ldev->disk_conf;
3334 *new_disk_conf = *old_disk_conf;
3335
6394b935 3336 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
813472ce 3337 }
daeda1cc 3338
b411b363
PR
3339 if (apv >= 88) {
3340 if (apv == 88) {
3341 if (data_size > SHARED_SECRET_MAX) {
3342 dev_err(DEV, "verify-alg too long, "
3343 "peer wants %u, accepting only %u byte\n",
3344 data_size, SHARED_SECRET_MAX);
813472ce
PR
3345 err = -EIO;
3346 goto reconnect;
b411b363
PR
3347 }
3348
82bc0194 3349 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
813472ce
PR
3350 if (err)
3351 goto reconnect;
b411b363
PR
3352 /* we expect NUL terminated string */
3353 /* but just in case someone tries to be evil */
3354 D_ASSERT(p->verify_alg[data_size-1] == 0);
3355 p->verify_alg[data_size-1] = 0;
3356
3357 } else /* apv >= 89 */ {
3358 /* we still expect NUL terminated strings */
3359 /* but just in case someone tries to be evil */
3360 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3361 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3362 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3363 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3364 }
3365
2ec91e0e 3366 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
b411b363
PR
3367 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3368 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3369 old_net_conf->verify_alg, p->verify_alg);
b411b363
PR
3370 goto disconnect;
3371 }
3372 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3373 p->verify_alg, "verify-alg");
3374 if (IS_ERR(verify_tfm)) {
3375 verify_tfm = NULL;
3376 goto disconnect;
3377 }
3378 }
3379
2ec91e0e 3380 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
b411b363
PR
3381 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3382 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2ec91e0e 3383 old_net_conf->csums_alg, p->csums_alg);
b411b363
PR
3384 goto disconnect;
3385 }
3386 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3387 p->csums_alg, "csums-alg");
3388 if (IS_ERR(csums_tfm)) {
3389 csums_tfm = NULL;
3390 goto disconnect;
3391 }
3392 }
3393
813472ce 3394 if (apv > 94 && new_disk_conf) {
daeda1cc
PR
3395 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3396 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3397 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3398 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
778f271d 3399
daeda1cc 3400 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
9958c857 3401 if (fifo_size != mdev->rs_plan_s->size) {
813472ce
PR
3402 new_plan = fifo_alloc(fifo_size);
3403 if (!new_plan) {
778f271d 3404 dev_err(DEV, "kmalloc of fifo_buffer failed");
f399002e 3405 put_ldev(mdev);
778f271d
PR
3406 goto disconnect;
3407 }
3408 }
8e26f9cc 3409 }
b411b363 3410
91fd4dad 3411 if (verify_tfm || csums_tfm) {
2ec91e0e
PR
3412 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3413 if (!new_net_conf) {
91fd4dad
PR
3414 dev_err(DEV, "Allocation of new net_conf failed\n");
3415 goto disconnect;
3416 }
3417
2ec91e0e 3418 *new_net_conf = *old_net_conf;
91fd4dad
PR
3419
3420 if (verify_tfm) {
2ec91e0e
PR
3421 strcpy(new_net_conf->verify_alg, p->verify_alg);
3422 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
91fd4dad
PR
3423 crypto_free_hash(mdev->tconn->verify_tfm);
3424 mdev->tconn->verify_tfm = verify_tfm;
3425 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3426 }
3427 if (csums_tfm) {
2ec91e0e
PR
3428 strcpy(new_net_conf->csums_alg, p->csums_alg);
3429 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
91fd4dad
PR
3430 crypto_free_hash(mdev->tconn->csums_tfm);
3431 mdev->tconn->csums_tfm = csums_tfm;
3432 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3433 }
2ec91e0e 3434 rcu_assign_pointer(tconn->net_conf, new_net_conf);
b411b363 3435 }
daeda1cc 3436 }
91fd4dad 3437
813472ce
PR
3438 if (new_disk_conf) {
3439 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3440 put_ldev(mdev);
3441 }
3442
3443 if (new_plan) {
3444 old_plan = mdev->rs_plan_s;
3445 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
b411b363 3446 }
daeda1cc
PR
3447
3448 mutex_unlock(&mdev->tconn->conf_update);
3449 synchronize_rcu();
3450 if (new_net_conf)
3451 kfree(old_net_conf);
3452 kfree(old_disk_conf);
813472ce 3453 kfree(old_plan);
daeda1cc 3454
82bc0194 3455 return 0;
b411b363 3456
813472ce
PR
3457reconnect:
3458 if (new_disk_conf) {
3459 put_ldev(mdev);
3460 kfree(new_disk_conf);
3461 }
3462 mutex_unlock(&mdev->tconn->conf_update);
3463 return -EIO;
3464
b411b363 3465disconnect:
813472ce
PR
3466 kfree(new_plan);
3467 if (new_disk_conf) {
3468 put_ldev(mdev);
3469 kfree(new_disk_conf);
3470 }
a0095508 3471 mutex_unlock(&mdev->tconn->conf_update);
b411b363
PR
3472 /* just for completeness: actually not needed,
3473 * as this is not reached if csums_tfm was ok. */
3474 crypto_free_hash(csums_tfm);
3475 /* but free the verify_tfm again, if csums_tfm did not work out */
3476 crypto_free_hash(verify_tfm);
38fa9988 3477 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3478 return -EIO;
b411b363
PR
3479}
3480
b411b363
PR
3481/* warn if the arguments differ by more than 12.5% */
3482static void warn_if_differ_considerably(struct drbd_conf *mdev,
3483 const char *s, sector_t a, sector_t b)
3484{
3485 sector_t d;
3486 if (a == 0 || b == 0)
3487 return;
3488 d = (a > b) ? (a - b) : (b - a);
3489 if (d > (a>>3) || d > (b>>3))
3490 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3491 (unsigned long long)a, (unsigned long long)b);
3492}
3493
4a76b161 3494static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3495{
4a76b161 3496 struct drbd_conf *mdev;
e658983a 3497 struct p_sizes *p = pi->data;
b411b363 3498 enum determine_dev_size dd = unchanged;
b411b363
PR
3499 sector_t p_size, p_usize, my_usize;
3500 int ldsc = 0; /* local disk size changed */
e89b591c 3501 enum dds_flags ddsf;
b411b363 3502
4a76b161
AG
3503 mdev = vnr_to_mdev(tconn, pi->vnr);
3504 if (!mdev)
3505 return config_unknown_volume(tconn, pi);
3506
b411b363
PR
3507 p_size = be64_to_cpu(p->d_size);
3508 p_usize = be64_to_cpu(p->u_size);
3509
b411b363
PR
3510 /* just store the peer's disk size for now.
3511 * we still need to figure out whether we accept that. */
3512 mdev->p_size = p_size;
3513
b411b363 3514 if (get_ldev(mdev)) {
daeda1cc
PR
3515 rcu_read_lock();
3516 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3517 rcu_read_unlock();
3518
b411b363
PR
3519 warn_if_differ_considerably(mdev, "lower level device sizes",
3520 p_size, drbd_get_max_capacity(mdev->ldev));
3521 warn_if_differ_considerably(mdev, "user requested size",
daeda1cc 3522 p_usize, my_usize);
b411b363
PR
3523
3524 /* if this is the first connect, or an otherwise expected
3525 * param exchange, choose the minimum */
3526 if (mdev->state.conn == C_WF_REPORT_PARAMS)
daeda1cc 3527 p_usize = min_not_zero(my_usize, p_usize);
b411b363
PR
3528
3529 /* Never shrink a device with usable data during connect.
3530 But allow online shrinking if we are connected. */
ef5e44a6 3531 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
daeda1cc
PR
3532 drbd_get_capacity(mdev->this_bdev) &&
3533 mdev->state.disk >= D_OUTDATED &&
3534 mdev->state.conn < C_CONNECTED) {
b411b363 3535 dev_err(DEV, "The peer's disk size is too small!\n");
38fa9988 3536 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 3537 put_ldev(mdev);
82bc0194 3538 return -EIO;
b411b363 3539 }
daeda1cc
PR
3540
3541 if (my_usize != p_usize) {
3542 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3543
3544 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3545 if (!new_disk_conf) {
3546 dev_err(DEV, "Allocation of new disk_conf failed\n");
3547 put_ldev(mdev);
3548 return -ENOMEM;
3549 }
3550
3551 mutex_lock(&mdev->tconn->conf_update);
3552 old_disk_conf = mdev->ldev->disk_conf;
3553 *new_disk_conf = *old_disk_conf;
3554 new_disk_conf->disk_size = p_usize;
3555
3556 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3557 mutex_unlock(&mdev->tconn->conf_update);
3558 synchronize_rcu();
3559 kfree(old_disk_conf);
3560
3561 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3562 (unsigned long)my_usize);
3563 }
3564
b411b363
PR
3565 put_ldev(mdev);
3566 }
b411b363 3567
e89b591c 3568 ddsf = be16_to_cpu(p->dds_flags);
b411b363 3569 if (get_ldev(mdev)) {
24c4830c 3570 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
3571 put_ldev(mdev);
3572 if (dd == dev_size_error)
82bc0194 3573 return -EIO;
b411b363
PR
3574 drbd_md_sync(mdev);
3575 } else {
3576 /* I am diskless, need to accept the peer's size. */
3577 drbd_set_my_capacity(mdev, p_size);
3578 }
3579
99432fcc
PR
3580 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3581 drbd_reconsider_max_bio_size(mdev);
3582
b411b363
PR
3583 if (get_ldev(mdev)) {
3584 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3585 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3586 ldsc = 1;
3587 }
3588
b411b363
PR
3589 put_ldev(mdev);
3590 }
3591
3592 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3593 if (be64_to_cpu(p->c_size) !=
3594 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3595 /* we have different sizes, probably peer
3596 * needs to know my new size... */
e89b591c 3597 drbd_send_sizes(mdev, 0, ddsf);
b411b363
PR
3598 }
3599 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3600 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3601 if (mdev->state.pdsk >= D_INCONSISTENT &&
e89b591c
PR
3602 mdev->state.disk >= D_INCONSISTENT) {
3603 if (ddsf & DDSF_NO_RESYNC)
3604 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3605 else
3606 resync_after_online_grow(mdev);
3607 } else
b411b363
PR
3608 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3609 }
3610 }
3611
82bc0194 3612 return 0;
b411b363
PR
3613}
3614
4a76b161 3615static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3616{
4a76b161 3617 struct drbd_conf *mdev;
e658983a 3618 struct p_uuids *p = pi->data;
b411b363 3619 u64 *p_uuid;
62b0da3a 3620 int i, updated_uuids = 0;
b411b363 3621
4a76b161
AG
3622 mdev = vnr_to_mdev(tconn, pi->vnr);
3623 if (!mdev)
3624 return config_unknown_volume(tconn, pi);
3625
b411b363
PR
3626 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3627
3628 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3629 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3630
3631 kfree(mdev->p_uuid);
3632 mdev->p_uuid = p_uuid;
3633
3634 if (mdev->state.conn < C_CONNECTED &&
3635 mdev->state.disk < D_INCONSISTENT &&
3636 mdev->state.role == R_PRIMARY &&
3637 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3638 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3639 (unsigned long long)mdev->ed_uuid);
38fa9988 3640 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3641 return -EIO;
b411b363
PR
3642 }
3643
3644 if (get_ldev(mdev)) {
3645 int skip_initial_sync =
3646 mdev->state.conn == C_CONNECTED &&
31890f4a 3647 mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
3648 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3649 (p_uuid[UI_FLAGS] & 8);
3650 if (skip_initial_sync) {
3651 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3652 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
20ceb2b2
LE
3653 "clear_n_write from receive_uuids",
3654 BM_LOCKED_TEST_ALLOWED);
b411b363
PR
3655 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3656 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3657 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3658 CS_VERBOSE, NULL);
3659 drbd_md_sync(mdev);
62b0da3a 3660 updated_uuids = 1;
b411b363
PR
3661 }
3662 put_ldev(mdev);
18a50fa2
PR
3663 } else if (mdev->state.disk < D_INCONSISTENT &&
3664 mdev->state.role == R_PRIMARY) {
3665 /* I am a diskless primary, the peer just created a new current UUID
3666 for me. */
62b0da3a 3667 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
b411b363
PR
3668 }
3669
3670 /* Before we test for the disk state, we should wait until an eventually
3671 ongoing cluster wide state change is finished. That is important if
3672 we are primary and are detaching from our disk. We need to see the
3673 new disk state... */
8410da8f
PR
3674 mutex_lock(mdev->state_mutex);
3675 mutex_unlock(mdev->state_mutex);
b411b363 3676 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
62b0da3a
LE
3677 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3678
3679 if (updated_uuids)
3680 drbd_print_uuids(mdev, "receiver updated UUIDs to");
b411b363 3681
82bc0194 3682 return 0;
b411b363
PR
3683}
3684
3685/**
3686 * convert_state() - Converts the peer's view of the cluster state to our point of view
3687 * @ps: The state as seen by the peer.
3688 */
3689static union drbd_state convert_state(union drbd_state ps)
3690{
3691 union drbd_state ms;
3692
3693 static enum drbd_conns c_tab[] = {
369bea63 3694 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
b411b363
PR
3695 [C_CONNECTED] = C_CONNECTED,
3696
3697 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3698 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3699 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3700 [C_VERIFY_S] = C_VERIFY_T,
3701 [C_MASK] = C_MASK,
3702 };
3703
3704 ms.i = ps.i;
3705
3706 ms.conn = c_tab[ps.conn];
3707 ms.peer = ps.role;
3708 ms.role = ps.peer;
3709 ms.pdsk = ps.disk;
3710 ms.disk = ps.pdsk;
3711 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3712
3713 return ms;
3714}
3715
4a76b161 3716static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3717{
4a76b161 3718 struct drbd_conf *mdev;
e658983a 3719 struct p_req_state *p = pi->data;
b411b363 3720 union drbd_state mask, val;
bf885f8a 3721 enum drbd_state_rv rv;
b411b363 3722
4a76b161
AG
3723 mdev = vnr_to_mdev(tconn, pi->vnr);
3724 if (!mdev)
3725 return -EIO;
3726
b411b363
PR
3727 mask.i = be32_to_cpu(p->mask);
3728 val.i = be32_to_cpu(p->val);
3729
25703f83 3730 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
8410da8f 3731 mutex_is_locked(mdev->state_mutex)) {
b411b363 3732 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
82bc0194 3733 return 0;
b411b363
PR
3734 }
3735
3736 mask = convert_state(mask);
3737 val = convert_state(val);
3738
dfafcc8a
PR
3739 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3740 drbd_send_sr_reply(mdev, rv);
b411b363 3741
b411b363
PR
3742 drbd_md_sync(mdev);
3743
82bc0194 3744 return 0;
b411b363
PR
3745}
3746
e2857216 3747static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
dfafcc8a 3748{
e658983a 3749 struct p_req_state *p = pi->data;
dfafcc8a
PR
3750 union drbd_state mask, val;
3751 enum drbd_state_rv rv;
3752
3753 mask.i = be32_to_cpu(p->mask);
3754 val.i = be32_to_cpu(p->val);
3755
3756 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3757 mutex_is_locked(&tconn->cstate_mutex)) {
3758 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
82bc0194 3759 return 0;
dfafcc8a
PR
3760 }
3761
3762 mask = convert_state(mask);
3763 val = convert_state(val);
3764
778bcf2e 3765 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
dfafcc8a
PR
3766 conn_send_sr_reply(tconn, rv);
3767
82bc0194 3768 return 0;
dfafcc8a
PR
3769}
3770
4a76b161 3771static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3772{
4a76b161 3773 struct drbd_conf *mdev;
e658983a 3774 struct p_state *p = pi->data;
4ac4aada 3775 union drbd_state os, ns, peer_state;
b411b363 3776 enum drbd_disk_state real_peer_disk;
65d922c3 3777 enum chg_state_flags cs_flags;
b411b363
PR
3778 int rv;
3779
4a76b161
AG
3780 mdev = vnr_to_mdev(tconn, pi->vnr);
3781 if (!mdev)
3782 return config_unknown_volume(tconn, pi);
3783
b411b363
PR
3784 peer_state.i = be32_to_cpu(p->state);
3785
3786 real_peer_disk = peer_state.disk;
3787 if (peer_state.disk == D_NEGOTIATING) {
3788 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3789 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3790 }
3791
87eeee41 3792 spin_lock_irq(&mdev->tconn->req_lock);
b411b363 3793 retry:
78bae59b 3794 os = ns = drbd_read_state(mdev);
87eeee41 3795 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363 3796
b8853dbd
PR
3797 /* If some other part of the code (asender thread, timeout)
3798 * already decided to close the connection again,
3799 * we must not "re-establish" it here. */
3800 if (os.conn <= C_TEAR_DOWN)
3801 return false;
3802
9bcd2521
PR
3803 /* If this is the "end of sync" confirmation, usually the peer disk
3804 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3805 * set) resync started in PausedSyncT, or if the timing of pause-/
3806 * unpause-sync events has been "just right", the peer disk may
3807 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3808 */
3809 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3810 real_peer_disk == D_UP_TO_DATE &&
e9ef7bb6
LE
3811 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3812 /* If we are (becoming) SyncSource, but peer is still in sync
3813 * preparation, ignore its uptodate-ness to avoid flapping, it
3814 * will change to inconsistent once the peer reaches active
3815 * syncing states.
3816 * It may have changed syncer-paused flags, however, so we
3817 * cannot ignore this completely. */
3818 if (peer_state.conn > C_CONNECTED &&
3819 peer_state.conn < C_SYNC_SOURCE)
3820 real_peer_disk = D_INCONSISTENT;
3821
3822 /* if peer_state changes to connected at the same time,
3823 * it explicitly notifies us that it finished resync.
3824 * Maybe we should finish it up, too? */
3825 else if (os.conn >= C_SYNC_SOURCE &&
3826 peer_state.conn == C_CONNECTED) {
3827 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3828 drbd_resync_finished(mdev);
82bc0194 3829 return 0;
e9ef7bb6
LE
3830 }
3831 }
3832
3833 /* peer says his disk is inconsistent, while we think it is uptodate,
3834 * and this happens while the peer still thinks we have a sync going on,
3835 * but we think we are already done with the sync.
3836 * We ignore this to avoid flapping pdsk.
3837 * This should not happen, if the peer is a recent version of drbd. */
3838 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3839 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3840 real_peer_disk = D_UP_TO_DATE;
3841
4ac4aada
LE
3842 if (ns.conn == C_WF_REPORT_PARAMS)
3843 ns.conn = C_CONNECTED;
b411b363 3844
67531718
PR
3845 if (peer_state.conn == C_AHEAD)
3846 ns.conn = C_BEHIND;
3847
b411b363
PR
3848 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3849 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3850 int cr; /* consider resync */
3851
3852 /* if we established a new connection */
4ac4aada 3853 cr = (os.conn < C_CONNECTED);
b411b363
PR
3854 /* if we had an established connection
3855 * and one of the nodes newly attaches a disk */
4ac4aada 3856 cr |= (os.conn == C_CONNECTED &&
b411b363 3857 (peer_state.disk == D_NEGOTIATING ||
4ac4aada 3858 os.disk == D_NEGOTIATING));
b411b363
PR
3859 /* if we have both been inconsistent, and the peer has been
3860 * forced to be UpToDate with --overwrite-data */
3861 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3862 /* if we had been plain connected, and the admin requested to
3863 * start a sync by "invalidate" or "invalidate-remote" */
4ac4aada 3864 cr |= (os.conn == C_CONNECTED &&
b411b363
PR
3865 (peer_state.conn >= C_STARTING_SYNC_S &&
3866 peer_state.conn <= C_WF_BITMAP_T));
3867
3868 if (cr)
4ac4aada 3869 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
b411b363
PR
3870
3871 put_ldev(mdev);
4ac4aada
LE
3872 if (ns.conn == C_MASK) {
3873 ns.conn = C_CONNECTED;
b411b363 3874 if (mdev->state.disk == D_NEGOTIATING) {
82f59cc6 3875 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363
PR
3876 } else if (peer_state.disk == D_NEGOTIATING) {
3877 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3878 peer_state.disk = D_DISKLESS;
580b9767 3879 real_peer_disk = D_DISKLESS;
b411b363 3880 } else {
8169e41b 3881 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
82bc0194 3882 return -EIO;
4ac4aada 3883 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
38fa9988 3884 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3885 return -EIO;
b411b363
PR
3886 }
3887 }
3888 }
3889
87eeee41 3890 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b 3891 if (os.i != drbd_read_state(mdev).i)
b411b363
PR
3892 goto retry;
3893 clear_bit(CONSIDER_RESYNC, &mdev->flags);
b411b363
PR
3894 ns.peer = peer_state.role;
3895 ns.pdsk = real_peer_disk;
3896 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
4ac4aada 3897 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
b411b363 3898 ns.disk = mdev->new_state_tmp.disk;
4ac4aada 3899 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
2aebfabb 3900 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
481c6f50 3901 test_bit(NEW_CUR_UUID, &mdev->flags)) {
8554df1c 3902 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
481c6f50 3903 for temporal network outages! */
87eeee41 3904 spin_unlock_irq(&mdev->tconn->req_lock);
481c6f50 3905 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
2f5cdd0b 3906 tl_clear(mdev->tconn);
481c6f50
PR
3907 drbd_uuid_new_current(mdev);
3908 clear_bit(NEW_CUR_UUID, &mdev->flags);
38fa9988 3909 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
82bc0194 3910 return -EIO;
481c6f50 3911 }
65d922c3 3912 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
78bae59b 3913 ns = drbd_read_state(mdev);
87eeee41 3914 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
3915
3916 if (rv < SS_SUCCESS) {
38fa9988 3917 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
82bc0194 3918 return -EIO;
b411b363
PR
3919 }
3920
4ac4aada
LE
3921 if (os.conn > C_WF_REPORT_PARAMS) {
3922 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
b411b363
PR
3923 peer_state.disk != D_NEGOTIATING ) {
3924 /* we want resync, peer has not yet decided to sync... */
3925 /* Nowadays only used when forcing a node into primary role and
3926 setting its disk to UpToDate with that */
3927 drbd_send_uuids(mdev);
43de7c85 3928 drbd_send_current_state(mdev);
b411b363
PR
3929 }
3930 }
3931
08b165ba 3932 clear_bit(DISCARD_MY_DATA, &mdev->flags);
b411b363
PR
3933
3934 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3935
82bc0194 3936 return 0;
b411b363
PR
3937}
3938
4a76b161 3939static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 3940{
4a76b161 3941 struct drbd_conf *mdev;
e658983a 3942 struct p_rs_uuid *p = pi->data;
4a76b161
AG
3943
3944 mdev = vnr_to_mdev(tconn, pi->vnr);
3945 if (!mdev)
3946 return -EIO;
b411b363
PR
3947
3948 wait_event(mdev->misc_wait,
3949 mdev->state.conn == C_WF_SYNC_UUID ||
c4752ef1 3950 mdev->state.conn == C_BEHIND ||
b411b363
PR
3951 mdev->state.conn < C_CONNECTED ||
3952 mdev->state.disk < D_NEGOTIATING);
3953
3954 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3955
b411b363
PR
3956 /* Here the _drbd_uuid_ functions are right, current should
3957 _not_ be rotated into the history */
3958 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3959 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3960 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3961
62b0da3a 3962 drbd_print_uuids(mdev, "updated sync uuid");
b411b363
PR
3963 drbd_start_resync(mdev, C_SYNC_TARGET);
3964
3965 put_ldev(mdev);
3966 } else
3967 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3968
82bc0194 3969 return 0;
b411b363
PR
3970}
3971
2c46407d
AG
3972/**
3973 * receive_bitmap_plain
3974 *
3975 * Return 0 when done, 1 when another iteration is needed, and a negative error
3976 * code upon failure.
3977 */
3978static int
50d0b1ad 3979receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
e658983a 3980 unsigned long *p, struct bm_xfer_ctx *c)
b411b363 3981{
50d0b1ad
AG
3982 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
3983 drbd_header_size(mdev->tconn);
e658983a 3984 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
50d0b1ad 3985 c->bm_words - c->word_offset);
e658983a 3986 unsigned int want = num_words * sizeof(*p);
2c46407d 3987 int err;
b411b363 3988
50d0b1ad
AG
3989 if (want != size) {
3990 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
2c46407d 3991 return -EIO;
b411b363
PR
3992 }
3993 if (want == 0)
2c46407d 3994 return 0;
e658983a 3995 err = drbd_recv_all(mdev->tconn, p, want);
82bc0194 3996 if (err)
2c46407d 3997 return err;
b411b363 3998
e658983a 3999 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
b411b363
PR
4000
4001 c->word_offset += num_words;
4002 c->bit_offset = c->word_offset * BITS_PER_LONG;
4003 if (c->bit_offset > c->bm_bits)
4004 c->bit_offset = c->bm_bits;
4005
2c46407d 4006 return 1;
b411b363
PR
4007}
4008
a02d1240
AG
4009static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4010{
4011 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4012}
4013
4014static int dcbp_get_start(struct p_compressed_bm *p)
4015{
4016 return (p->encoding & 0x80) != 0;
4017}
4018
4019static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4020{
4021 return (p->encoding >> 4) & 0x7;
4022}
4023
2c46407d
AG
4024/**
4025 * recv_bm_rle_bits
4026 *
4027 * Return 0 when done, 1 when another iteration is needed, and a negative error
4028 * code upon failure.
4029 */
4030static int
b411b363
PR
4031recv_bm_rle_bits(struct drbd_conf *mdev,
4032 struct p_compressed_bm *p,
c6d25cfe
PR
4033 struct bm_xfer_ctx *c,
4034 unsigned int len)
b411b363
PR
4035{
4036 struct bitstream bs;
4037 u64 look_ahead;
4038 u64 rl;
4039 u64 tmp;
4040 unsigned long s = c->bit_offset;
4041 unsigned long e;
a02d1240 4042 int toggle = dcbp_get_start(p);
b411b363
PR
4043 int have;
4044 int bits;
4045
a02d1240 4046 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
b411b363
PR
4047
4048 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4049 if (bits < 0)
2c46407d 4050 return -EIO;
b411b363
PR
4051
4052 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4053 bits = vli_decode_bits(&rl, look_ahead);
4054 if (bits <= 0)
2c46407d 4055 return -EIO;
b411b363
PR
4056
4057 if (toggle) {
4058 e = s + rl -1;
4059 if (e >= c->bm_bits) {
4060 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
2c46407d 4061 return -EIO;
b411b363
PR
4062 }
4063 _drbd_bm_set_bits(mdev, s, e);
4064 }
4065
4066 if (have < bits) {
4067 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4068 have, bits, look_ahead,
4069 (unsigned int)(bs.cur.b - p->code),
4070 (unsigned int)bs.buf_len);
2c46407d 4071 return -EIO;
b411b363
PR
4072 }
4073 look_ahead >>= bits;
4074 have -= bits;
4075
4076 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4077 if (bits < 0)
2c46407d 4078 return -EIO;
b411b363
PR
4079 look_ahead |= tmp << have;
4080 have += bits;
4081 }
4082
4083 c->bit_offset = s;
4084 bm_xfer_ctx_bit_to_word_offset(c);
4085
2c46407d 4086 return (s != c->bm_bits);
b411b363
PR
4087}
4088
2c46407d
AG
4089/**
4090 * decode_bitmap_c
4091 *
4092 * Return 0 when done, 1 when another iteration is needed, and a negative error
4093 * code upon failure.
4094 */
4095static int
b411b363
PR
4096decode_bitmap_c(struct drbd_conf *mdev,
4097 struct p_compressed_bm *p,
c6d25cfe
PR
4098 struct bm_xfer_ctx *c,
4099 unsigned int len)
b411b363 4100{
a02d1240 4101 if (dcbp_get_code(p) == RLE_VLI_Bits)
e658983a 4102 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
b411b363
PR
4103
4104 /* other variants had been implemented for evaluation,
4105 * but have been dropped as this one turned out to be "best"
4106 * during all our tests. */
4107
4108 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
38fa9988 4109 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
2c46407d 4110 return -EIO;
b411b363
PR
4111}
4112
4113void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4114 const char *direction, struct bm_xfer_ctx *c)
4115{
4116 /* what would it take to transfer it "plaintext" */
50d0b1ad
AG
4117 unsigned int header_size = drbd_header_size(mdev->tconn);
4118 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4119 unsigned int plain =
4120 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4121 c->bm_words * sizeof(unsigned long);
4122 unsigned int total = c->bytes[0] + c->bytes[1];
4123 unsigned int r;
b411b363
PR
4124
4125 /* total can not be zero. but just in case: */
4126 if (total == 0)
4127 return;
4128
4129 /* don't report if not compressed */
4130 if (total >= plain)
4131 return;
4132
4133 /* total < plain. check for overflow, still */
4134 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4135 : (1000 * total / plain);
4136
4137 if (r > 1000)
4138 r = 1000;
4139
4140 r = 1000 - r;
4141 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4142 "total %u; compression: %u.%u%%\n",
4143 direction,
4144 c->bytes[1], c->packets[1],
4145 c->bytes[0], c->packets[0],
4146 total, r/10, r % 10);
4147}
4148
4149/* Since we are processing the bitfield from lower addresses to higher,
4150 it does not matter if the process it in 32 bit chunks or 64 bit
4151 chunks as long as it is little endian. (Understand it as byte stream,
4152 beginning with the lowest byte...) If we would use big endian
4153 we would need to process it from the highest address to the lowest,
4154 in order to be agnostic to the 32 vs 64 bits issue.
4155
4156 returns 0 on failure, 1 if we successfully received it. */
4a76b161 4157static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4158{
4a76b161 4159 struct drbd_conf *mdev;
b411b363 4160 struct bm_xfer_ctx c;
2c46407d 4161 int err;
4a76b161
AG
4162
4163 mdev = vnr_to_mdev(tconn, pi->vnr);
4164 if (!mdev)
4165 return -EIO;
b411b363 4166
20ceb2b2
LE
4167 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4168 /* you are supposed to send additional out-of-sync information
4169 * if you actually set bits during this phase */
b411b363 4170
b411b363
PR
4171 c = (struct bm_xfer_ctx) {
4172 .bm_bits = drbd_bm_bits(mdev),
4173 .bm_words = drbd_bm_words(mdev),
4174 };
4175
2c46407d 4176 for(;;) {
e658983a
AG
4177 if (pi->cmd == P_BITMAP)
4178 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4179 else if (pi->cmd == P_COMPRESSED_BITMAP) {
b411b363
PR
4180 /* MAYBE: sanity check that we speak proto >= 90,
4181 * and the feature is enabled! */
e658983a 4182 struct p_compressed_bm *p = pi->data;
b411b363 4183
50d0b1ad 4184 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
b411b363 4185 dev_err(DEV, "ReportCBitmap packet too large\n");
82bc0194 4186 err = -EIO;
b411b363
PR
4187 goto out;
4188 }
e658983a 4189 if (pi->size <= sizeof(*p)) {
e2857216 4190 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
82bc0194 4191 err = -EIO;
78fcbdae 4192 goto out;
b411b363 4193 }
e658983a
AG
4194 err = drbd_recv_all(mdev->tconn, p, pi->size);
4195 if (err)
4196 goto out;
e2857216 4197 err = decode_bitmap_c(mdev, p, &c, pi->size);
b411b363 4198 } else {
e2857216 4199 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
82bc0194 4200 err = -EIO;
b411b363
PR
4201 goto out;
4202 }
4203
e2857216 4204 c.packets[pi->cmd == P_BITMAP]++;
50d0b1ad 4205 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
b411b363 4206
2c46407d
AG
4207 if (err <= 0) {
4208 if (err < 0)
4209 goto out;
b411b363 4210 break;
2c46407d 4211 }
e2857216 4212 err = drbd_recv_header(mdev->tconn, pi);
82bc0194 4213 if (err)
b411b363 4214 goto out;
2c46407d 4215 }
b411b363
PR
4216
4217 INFO_bm_xfer_stats(mdev, "receive", &c);
4218
4219 if (mdev->state.conn == C_WF_BITMAP_T) {
de1f8e4a
AG
4220 enum drbd_state_rv rv;
4221
82bc0194
AG
4222 err = drbd_send_bitmap(mdev);
4223 if (err)
b411b363
PR
4224 goto out;
4225 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
de1f8e4a
AG
4226 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4227 D_ASSERT(rv == SS_SUCCESS);
b411b363
PR
4228 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4229 /* admin may have requested C_DISCONNECTING,
4230 * other threads may have noticed network errors */
4231 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4232 drbd_conn_str(mdev->state.conn));
4233 }
82bc0194 4234 err = 0;
b411b363 4235
b411b363 4236 out:
20ceb2b2 4237 drbd_bm_unlock(mdev);
82bc0194 4238 if (!err && mdev->state.conn == C_WF_BITMAP_S)
b411b363 4239 drbd_start_resync(mdev, C_SYNC_SOURCE);
82bc0194 4240 return err;
b411b363
PR
4241}
4242
4a76b161 4243static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4244{
4a76b161 4245 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
e2857216 4246 pi->cmd, pi->size);
2de876ef 4247
4a76b161 4248 return ignore_remaining_packet(tconn, pi);
2de876ef
PR
4249}
4250
4a76b161 4251static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
0ced55a3 4252{
e7f52dfb
LE
4253 /* Make sure we've acked all the TCP data associated
4254 * with the data requests being unplugged */
4a76b161 4255 drbd_tcp_quickack(tconn->data.socket);
0ced55a3 4256
82bc0194 4257 return 0;
0ced55a3
PR
4258}
4259
4a76b161 4260static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
73a01a18 4261{
4a76b161 4262 struct drbd_conf *mdev;
e658983a 4263 struct p_block_desc *p = pi->data;
4a76b161
AG
4264
4265 mdev = vnr_to_mdev(tconn, pi->vnr);
4266 if (!mdev)
4267 return -EIO;
73a01a18 4268
f735e363
LE
4269 switch (mdev->state.conn) {
4270 case C_WF_SYNC_UUID:
4271 case C_WF_BITMAP_T:
4272 case C_BEHIND:
4273 break;
4274 default:
4275 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4276 drbd_conn_str(mdev->state.conn));
4277 }
4278
73a01a18
PR
4279 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4280
82bc0194 4281 return 0;
73a01a18
PR
4282}
4283
02918be2
PR
4284struct data_cmd {
4285 int expect_payload;
4286 size_t pkt_size;
4a76b161 4287 int (*fn)(struct drbd_tconn *, struct packet_info *);
02918be2
PR
4288};
4289
4290static struct data_cmd drbd_cmd_handler[] = {
4a76b161
AG
4291 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4292 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4293 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4294 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
e658983a
AG
4295 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4296 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4297 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
4a76b161
AG
4298 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4299 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
e658983a
AG
4300 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4301 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
4a76b161
AG
4302 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4303 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4304 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4305 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4306 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4307 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4308 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4309 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4310 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4311 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
4312 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4313 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
036b17ea 4314 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
b411b363
PR
4315};
4316
eefc2f7d 4317static void drbdd(struct drbd_tconn *tconn)
b411b363 4318{
77351055 4319 struct packet_info pi;
02918be2 4320 size_t shs; /* sub header size */
82bc0194 4321 int err;
b411b363 4322
eefc2f7d 4323 while (get_t_state(&tconn->receiver) == RUNNING) {
deebe195
AG
4324 struct data_cmd *cmd;
4325
eefc2f7d 4326 drbd_thread_current_set_cpu(&tconn->receiver);
69bc7bc3 4327 if (drbd_recv_header(tconn, &pi))
02918be2 4328 goto err_out;
b411b363 4329
deebe195 4330 cmd = &drbd_cmd_handler[pi.cmd];
4a76b161 4331 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
2fcb8f30
AG
4332 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4333 cmdname(pi.cmd), pi.cmd);
02918be2 4334 goto err_out;
0b33a916 4335 }
b411b363 4336
e658983a
AG
4337 shs = cmd->pkt_size;
4338 if (pi.size > shs && !cmd->expect_payload) {
2fcb8f30
AG
4339 conn_err(tconn, "No payload expected %s l:%d\n",
4340 cmdname(pi.cmd), pi.size);
02918be2 4341 goto err_out;
b411b363 4342 }
b411b363 4343
c13f7e1a 4344 if (shs) {
e658983a 4345 err = drbd_recv_all_warn(tconn, pi.data, shs);
a5c31904 4346 if (err)
c13f7e1a 4347 goto err_out;
e2857216 4348 pi.size -= shs;
c13f7e1a
LE
4349 }
4350
4a76b161
AG
4351 err = cmd->fn(tconn, &pi);
4352 if (err) {
9f5bdc33
AG
4353 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4354 cmdname(pi.cmd), err, pi.size);
02918be2 4355 goto err_out;
b411b363
PR
4356 }
4357 }
82bc0194 4358 return;
b411b363 4359
82bc0194
AG
4360 err_out:
4361 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
b411b363
PR
4362}
4363
0e29d163 4364void conn_flush_workqueue(struct drbd_tconn *tconn)
b411b363
PR
4365{
4366 struct drbd_wq_barrier barr;
4367
4368 barr.w.cb = w_prev_work_done;
0e29d163 4369 barr.w.tconn = tconn;
b411b363 4370 init_completion(&barr.done);
0e29d163 4371 drbd_queue_work(&tconn->data.work, &barr.w);
b411b363
PR
4372 wait_for_completion(&barr.done);
4373}
4374
81fa2e67 4375static void conn_disconnect(struct drbd_tconn *tconn)
b411b363 4376{
c141ebda 4377 struct drbd_conf *mdev;
bbeb641c 4378 enum drbd_conns oc;
376694a0 4379 int vnr;
b411b363 4380
bbeb641c 4381 if (tconn->cstate == C_STANDALONE)
b411b363 4382 return;
b411b363 4383
b8853dbd
PR
4384 /* We are about to start the cleanup after connection loss.
4385 * Make sure drbd_make_request knows about that.
4386 * Usually we should be in some network failure state already,
4387 * but just in case we are not, we fix it up here.
4388 */
4389 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4390
b411b363 4391 /* asender does not clean up anything. it must not interfere, either */
360cc740
PR
4392 drbd_thread_stop(&tconn->asender);
4393 drbd_free_sock(tconn);
4394
c141ebda
PR
4395 rcu_read_lock();
4396 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4397 kref_get(&mdev->kref);
4398 rcu_read_unlock();
4399 drbd_disconnected(mdev);
4400 kref_put(&mdev->kref, &drbd_minor_destroy);
4401 rcu_read_lock();
4402 }
4403 rcu_read_unlock();
4404
12038a3a
PR
4405 if (!list_empty(&tconn->current_epoch->list))
4406 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4407 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4408 atomic_set(&tconn->current_epoch->epoch_size, 0);
4409
360cc740
PR
4410 conn_info(tconn, "Connection closed\n");
4411
cb703454
PR
4412 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4413 conn_try_outdate_peer_async(tconn);
4414
360cc740 4415 spin_lock_irq(&tconn->req_lock);
bbeb641c
PR
4416 oc = tconn->cstate;
4417 if (oc >= C_UNCONNECTED)
376694a0 4418 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
bbeb641c 4419
360cc740
PR
4420 spin_unlock_irq(&tconn->req_lock);
4421
f3dfa40a 4422 if (oc == C_DISCONNECTING)
d9cc6e23 4423 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
360cc740
PR
4424}
4425
c141ebda 4426static int drbd_disconnected(struct drbd_conf *mdev)
360cc740 4427{
360cc740 4428 unsigned int i;
b411b363 4429
85719573 4430 /* wait for current activity to cease. */
87eeee41 4431 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
4432 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4433 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4434 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
87eeee41 4435 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
4436
4437 /* We do not have data structures that would allow us to
4438 * get the rs_pending_cnt down to 0 again.
4439 * * On C_SYNC_TARGET we do not have any data structures describing
4440 * the pending RSDataRequest's we have sent.
4441 * * On C_SYNC_SOURCE there is no data structure that tracks
4442 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4443 * And no, it is not the sum of the reference counts in the
4444 * resync_LRU. The resync_LRU tracks the whole operation including
4445 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4446 * on the fly. */
4447 drbd_rs_cancel_all(mdev);
4448 mdev->rs_total = 0;
4449 mdev->rs_failed = 0;
4450 atomic_set(&mdev->rs_pending_cnt, 0);
4451 wake_up(&mdev->misc_wait);
4452
b411b363 4453 del_timer_sync(&mdev->resync_timer);
b411b363
PR
4454 resync_timer_fn((unsigned long)mdev);
4455
b411b363
PR
4456 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4457 * w_make_resync_request etc. which may still be on the worker queue
4458 * to be "canceled" */
a21e9298 4459 drbd_flush_workqueue(mdev);
b411b363 4460
a990be46 4461 drbd_finish_peer_reqs(mdev);
b411b363 4462
d10b4ea3
PR
4463 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4464 might have issued a work again. The one before drbd_finish_peer_reqs() is
4465 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4466 drbd_flush_workqueue(mdev);
4467
b411b363
PR
4468 kfree(mdev->p_uuid);
4469 mdev->p_uuid = NULL;
4470
2aebfabb 4471 if (!drbd_suspended(mdev))
2f5cdd0b 4472 tl_clear(mdev->tconn);
b411b363 4473
b411b363
PR
4474 drbd_md_sync(mdev);
4475
20ceb2b2
LE
4476 /* serialize with bitmap writeout triggered by the state change,
4477 * if any. */
4478 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4479
b411b363
PR
4480 /* tcp_close and release of sendpage pages can be deferred. I don't
4481 * want to use SO_LINGER, because apparently it can be deferred for
4482 * more than 20 seconds (longest time I checked).
4483 *
4484 * Actually we don't care for exactly when the network stack does its
4485 * put_page(), but release our reference on these pages right here.
4486 */
7721f567 4487 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
b411b363
PR
4488 if (i)
4489 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
435f0740
LE
4490 i = atomic_read(&mdev->pp_in_use_by_net);
4491 if (i)
4492 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
b411b363
PR
4493 i = atomic_read(&mdev->pp_in_use);
4494 if (i)
45bb912b 4495 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
b411b363
PR
4496
4497 D_ASSERT(list_empty(&mdev->read_ee));
4498 D_ASSERT(list_empty(&mdev->active_ee));
4499 D_ASSERT(list_empty(&mdev->sync_ee));
4500 D_ASSERT(list_empty(&mdev->done_ee));
4501
360cc740 4502 return 0;
b411b363
PR
4503}
4504
4505/*
4506 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4507 * we can agree on is stored in agreed_pro_version.
4508 *
4509 * feature flags and the reserved array should be enough room for future
4510 * enhancements of the handshake protocol, and possible plugins...
4511 *
4512 * for now, they are expected to be zero, but ignored.
4513 */
6038178e 4514static int drbd_send_features(struct drbd_tconn *tconn)
b411b363 4515{
9f5bdc33
AG
4516 struct drbd_socket *sock;
4517 struct p_connection_features *p;
b411b363 4518
9f5bdc33
AG
4519 sock = &tconn->data;
4520 p = conn_prepare_command(tconn, sock);
4521 if (!p)
e8d17b01 4522 return -EIO;
b411b363
PR
4523 memset(p, 0, sizeof(*p));
4524 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4525 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
9f5bdc33 4526 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
b411b363
PR
4527}
4528
4529/*
4530 * return values:
4531 * 1 yes, we have a valid connection
4532 * 0 oops, did not work out, please try again
4533 * -1 peer talks different language,
4534 * no point in trying again, please go standalone.
4535 */
6038178e 4536static int drbd_do_features(struct drbd_tconn *tconn)
b411b363 4537{
65d11ed6 4538 /* ASSERT current == tconn->receiver ... */
e658983a
AG
4539 struct p_connection_features *p;
4540 const int expect = sizeof(struct p_connection_features);
77351055 4541 struct packet_info pi;
a5c31904 4542 int err;
b411b363 4543
6038178e 4544 err = drbd_send_features(tconn);
e8d17b01 4545 if (err)
b411b363
PR
4546 return 0;
4547
69bc7bc3
AG
4548 err = drbd_recv_header(tconn, &pi);
4549 if (err)
b411b363
PR
4550 return 0;
4551
6038178e
AG
4552 if (pi.cmd != P_CONNECTION_FEATURES) {
4553 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
2fcb8f30 4554 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4555 return -1;
4556 }
4557
77351055 4558 if (pi.size != expect) {
6038178e 4559 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
77351055 4560 expect, pi.size);
b411b363
PR
4561 return -1;
4562 }
4563
e658983a
AG
4564 p = pi.data;
4565 err = drbd_recv_all_warn(tconn, p, expect);
a5c31904 4566 if (err)
b411b363 4567 return 0;
b411b363 4568
b411b363
PR
4569 p->protocol_min = be32_to_cpu(p->protocol_min);
4570 p->protocol_max = be32_to_cpu(p->protocol_max);
4571 if (p->protocol_max == 0)
4572 p->protocol_max = p->protocol_min;
4573
4574 if (PRO_VERSION_MAX < p->protocol_min ||
4575 PRO_VERSION_MIN > p->protocol_max)
4576 goto incompat;
4577
65d11ed6 4578 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
b411b363 4579
65d11ed6
PR
4580 conn_info(tconn, "Handshake successful: "
4581 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
b411b363
PR
4582
4583 return 1;
4584
4585 incompat:
65d11ed6 4586 conn_err(tconn, "incompatible DRBD dialects: "
b411b363
PR
4587 "I support %d-%d, peer supports %d-%d\n",
4588 PRO_VERSION_MIN, PRO_VERSION_MAX,
4589 p->protocol_min, p->protocol_max);
4590 return -1;
4591}
4592
4593#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
13e6037d 4594static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363
PR
4595{
4596 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4597 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
b10d96cb 4598 return -1;
b411b363
PR
4599}
4600#else
4601#define CHALLENGE_LEN 64
b10d96cb
JT
4602
4603/* Return value:
4604 1 - auth succeeded,
4605 0 - failed, try again (network error),
4606 -1 - auth failed, don't try again.
4607*/
4608
13e6037d 4609static int drbd_do_auth(struct drbd_tconn *tconn)
b411b363 4610{
9f5bdc33 4611 struct drbd_socket *sock;
b411b363
PR
4612 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4613 struct scatterlist sg;
4614 char *response = NULL;
4615 char *right_response = NULL;
4616 char *peers_ch = NULL;
44ed167d
PR
4617 unsigned int key_len;
4618 char secret[SHARED_SECRET_MAX]; /* 64 byte */
b411b363
PR
4619 unsigned int resp_size;
4620 struct hash_desc desc;
77351055 4621 struct packet_info pi;
44ed167d 4622 struct net_conf *nc;
69bc7bc3 4623 int err, rv;
b411b363 4624
9f5bdc33
AG
4625 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4626
44ed167d
PR
4627 rcu_read_lock();
4628 nc = rcu_dereference(tconn->net_conf);
4629 key_len = strlen(nc->shared_secret);
4630 memcpy(secret, nc->shared_secret, key_len);
4631 rcu_read_unlock();
4632
13e6037d 4633 desc.tfm = tconn->cram_hmac_tfm;
b411b363
PR
4634 desc.flags = 0;
4635
44ed167d 4636 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
b411b363 4637 if (rv) {
13e6037d 4638 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
b10d96cb 4639 rv = -1;
b411b363
PR
4640 goto fail;
4641 }
4642
4643 get_random_bytes(my_challenge, CHALLENGE_LEN);
4644
9f5bdc33
AG
4645 sock = &tconn->data;
4646 if (!conn_prepare_command(tconn, sock)) {
4647 rv = 0;
4648 goto fail;
4649 }
e658983a 4650 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
9f5bdc33 4651 my_challenge, CHALLENGE_LEN);
b411b363
PR
4652 if (!rv)
4653 goto fail;
4654
69bc7bc3
AG
4655 err = drbd_recv_header(tconn, &pi);
4656 if (err) {
4657 rv = 0;
b411b363 4658 goto fail;
69bc7bc3 4659 }
b411b363 4660
77351055 4661 if (pi.cmd != P_AUTH_CHALLENGE) {
13e6037d 4662 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
2fcb8f30 4663 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4664 rv = 0;
4665 goto fail;
4666 }
4667
77351055 4668 if (pi.size > CHALLENGE_LEN * 2) {
13e6037d 4669 conn_err(tconn, "expected AuthChallenge payload too big.\n");
b10d96cb 4670 rv = -1;
b411b363
PR
4671 goto fail;
4672 }
4673
77351055 4674 peers_ch = kmalloc(pi.size, GFP_NOIO);
b411b363 4675 if (peers_ch == NULL) {
13e6037d 4676 conn_err(tconn, "kmalloc of peers_ch failed\n");
b10d96cb 4677 rv = -1;
b411b363
PR
4678 goto fail;
4679 }
4680
a5c31904
AG
4681 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4682 if (err) {
b411b363
PR
4683 rv = 0;
4684 goto fail;
4685 }
4686
13e6037d 4687 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
b411b363
PR
4688 response = kmalloc(resp_size, GFP_NOIO);
4689 if (response == NULL) {
13e6037d 4690 conn_err(tconn, "kmalloc of response failed\n");
b10d96cb 4691 rv = -1;
b411b363
PR
4692 goto fail;
4693 }
4694
4695 sg_init_table(&sg, 1);
77351055 4696 sg_set_buf(&sg, peers_ch, pi.size);
b411b363
PR
4697
4698 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4699 if (rv) {
13e6037d 4700 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4701 rv = -1;
b411b363
PR
4702 goto fail;
4703 }
4704
9f5bdc33
AG
4705 if (!conn_prepare_command(tconn, sock)) {
4706 rv = 0;
4707 goto fail;
4708 }
e658983a 4709 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
9f5bdc33 4710 response, resp_size);
b411b363
PR
4711 if (!rv)
4712 goto fail;
4713
69bc7bc3
AG
4714 err = drbd_recv_header(tconn, &pi);
4715 if (err) {
4716 rv = 0;
b411b363 4717 goto fail;
69bc7bc3 4718 }
b411b363 4719
77351055 4720 if (pi.cmd != P_AUTH_RESPONSE) {
13e6037d 4721 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
2fcb8f30 4722 cmdname(pi.cmd), pi.cmd);
b411b363
PR
4723 rv = 0;
4724 goto fail;
4725 }
4726
77351055 4727 if (pi.size != resp_size) {
13e6037d 4728 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
b411b363
PR
4729 rv = 0;
4730 goto fail;
4731 }
4732
a5c31904
AG
4733 err = drbd_recv_all_warn(tconn, response , resp_size);
4734 if (err) {
b411b363
PR
4735 rv = 0;
4736 goto fail;
4737 }
4738
4739 right_response = kmalloc(resp_size, GFP_NOIO);
2d1ee87d 4740 if (right_response == NULL) {
13e6037d 4741 conn_err(tconn, "kmalloc of right_response failed\n");
b10d96cb 4742 rv = -1;
b411b363
PR
4743 goto fail;
4744 }
4745
4746 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4747
4748 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4749 if (rv) {
13e6037d 4750 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
b10d96cb 4751 rv = -1;
b411b363
PR
4752 goto fail;
4753 }
4754
4755 rv = !memcmp(response, right_response, resp_size);
4756
4757 if (rv)
44ed167d
PR
4758 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4759 resp_size);
b10d96cb
JT
4760 else
4761 rv = -1;
b411b363
PR
4762
4763 fail:
4764 kfree(peers_ch);
4765 kfree(response);
4766 kfree(right_response);
4767
4768 return rv;
4769}
4770#endif
4771
4772int drbdd_init(struct drbd_thread *thi)
4773{
392c8801 4774 struct drbd_tconn *tconn = thi->tconn;
b411b363
PR
4775 int h;
4776
4d641dd7 4777 conn_info(tconn, "receiver (re)started\n");
b411b363
PR
4778
4779 do {
81fa2e67 4780 h = conn_connect(tconn);
b411b363 4781 if (h == 0) {
81fa2e67 4782 conn_disconnect(tconn);
20ee6390 4783 schedule_timeout_interruptible(HZ);
b411b363
PR
4784 }
4785 if (h == -1) {
4d641dd7 4786 conn_warn(tconn, "Discarding network configuration.\n");
bbeb641c 4787 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363
PR
4788 }
4789 } while (h == 0);
4790
91fd4dad
PR
4791 if (h > 0)
4792 drbdd(tconn);
b411b363 4793
81fa2e67 4794 conn_disconnect(tconn);
b411b363 4795
4d641dd7 4796 conn_info(tconn, "receiver terminated\n");
b411b363
PR
4797 return 0;
4798}
4799
4800/* ********* acknowledge sender ******** */
4801
e05e1e59 4802static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
e4f78ede 4803{
e658983a 4804 struct p_req_state_reply *p = pi->data;
e4f78ede
PR
4805 int retcode = be32_to_cpu(p->retcode);
4806
4807 if (retcode >= SS_SUCCESS) {
4808 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4809 } else {
4810 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4811 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4812 drbd_set_st_err_str(retcode), retcode);
4813 }
4814 wake_up(&tconn->ping_wait);
4815
2735a594 4816 return 0;
e4f78ede
PR
4817}
4818
1952e916 4819static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4820{
1952e916 4821 struct drbd_conf *mdev;
e658983a 4822 struct p_req_state_reply *p = pi->data;
b411b363
PR
4823 int retcode = be32_to_cpu(p->retcode);
4824
1952e916
AG
4825 mdev = vnr_to_mdev(tconn, pi->vnr);
4826 if (!mdev)
2735a594 4827 return -EIO;
1952e916 4828
4d0fc3fd
PR
4829 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4830 D_ASSERT(tconn->agreed_pro_version < 100);
4831 return got_conn_RqSReply(tconn, pi);
4832 }
4833
e4f78ede
PR
4834 if (retcode >= SS_SUCCESS) {
4835 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4836 } else {
4837 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4838 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4839 drbd_set_st_err_str(retcode), retcode);
b411b363 4840 }
e4f78ede
PR
4841 wake_up(&mdev->state_wait);
4842
2735a594 4843 return 0;
b411b363
PR
4844}
4845
e05e1e59 4846static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4847{
2735a594 4848 return drbd_send_ping_ack(tconn);
b411b363
PR
4849
4850}
4851
e05e1e59 4852static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363
PR
4853{
4854 /* restore idle timeout */
2a67d8b9
PR
4855 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4856 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4857 wake_up(&tconn->ping_wait);
b411b363 4858
2735a594 4859 return 0;
b411b363
PR
4860}
4861
1952e916 4862static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4863{
1952e916 4864 struct drbd_conf *mdev;
e658983a 4865 struct p_block_ack *p = pi->data;
b411b363
PR
4866 sector_t sector = be64_to_cpu(p->sector);
4867 int blksize = be32_to_cpu(p->blksize);
4868
1952e916
AG
4869 mdev = vnr_to_mdev(tconn, pi->vnr);
4870 if (!mdev)
2735a594 4871 return -EIO;
1952e916 4872
31890f4a 4873 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
b411b363
PR
4874
4875 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4876
1d53f09e
LE
4877 if (get_ldev(mdev)) {
4878 drbd_rs_complete_io(mdev, sector);
4879 drbd_set_in_sync(mdev, sector, blksize);
4880 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4881 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4882 put_ldev(mdev);
4883 }
b411b363 4884 dec_rs_pending(mdev);
778f271d 4885 atomic_add(blksize >> 9, &mdev->rs_sect_in);
b411b363 4886
2735a594 4887 return 0;
b411b363
PR
4888}
4889
bc9c5c41
AG
4890static int
4891validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4892 struct rb_root *root, const char *func,
4893 enum drbd_req_event what, bool missing_ok)
b411b363
PR
4894{
4895 struct drbd_request *req;
4896 struct bio_and_error m;
4897
87eeee41 4898 spin_lock_irq(&mdev->tconn->req_lock);
bc9c5c41 4899 req = find_request(mdev, root, id, sector, missing_ok, func);
b411b363 4900 if (unlikely(!req)) {
87eeee41 4901 spin_unlock_irq(&mdev->tconn->req_lock);
85997675 4902 return -EIO;
b411b363
PR
4903 }
4904 __req_mod(req, what, &m);
87eeee41 4905 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
4906
4907 if (m.bio)
4908 complete_master_bio(mdev, &m);
85997675 4909 return 0;
b411b363
PR
4910}
4911
1952e916 4912static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4913{
1952e916 4914 struct drbd_conf *mdev;
e658983a 4915 struct p_block_ack *p = pi->data;
b411b363
PR
4916 sector_t sector = be64_to_cpu(p->sector);
4917 int blksize = be32_to_cpu(p->blksize);
4918 enum drbd_req_event what;
4919
1952e916
AG
4920 mdev = vnr_to_mdev(tconn, pi->vnr);
4921 if (!mdev)
2735a594 4922 return -EIO;
1952e916 4923
b411b363
PR
4924 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4925
579b57ed 4926 if (p->block_id == ID_SYNCER) {
b411b363
PR
4927 drbd_set_in_sync(mdev, sector, blksize);
4928 dec_rs_pending(mdev);
2735a594 4929 return 0;
b411b363 4930 }
e05e1e59 4931 switch (pi->cmd) {
b411b363 4932 case P_RS_WRITE_ACK:
8554df1c 4933 what = WRITE_ACKED_BY_PEER_AND_SIS;
b411b363
PR
4934 break;
4935 case P_WRITE_ACK:
8554df1c 4936 what = WRITE_ACKED_BY_PEER;
b411b363
PR
4937 break;
4938 case P_RECV_ACK:
8554df1c 4939 what = RECV_ACKED_BY_PEER;
b411b363 4940 break;
7be8da07 4941 case P_DISCARD_WRITE:
7be8da07
AG
4942 what = DISCARD_WRITE;
4943 break;
4944 case P_RETRY_WRITE:
7be8da07 4945 what = POSTPONE_WRITE;
b411b363
PR
4946 break;
4947 default:
2735a594 4948 BUG();
b411b363
PR
4949 }
4950
2735a594
AG
4951 return validate_req_change_req_state(mdev, p->block_id, sector,
4952 &mdev->write_requests, __func__,
4953 what, false);
b411b363
PR
4954}
4955
1952e916 4956static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4957{
1952e916 4958 struct drbd_conf *mdev;
e658983a 4959 struct p_block_ack *p = pi->data;
b411b363 4960 sector_t sector = be64_to_cpu(p->sector);
2deb8336 4961 int size = be32_to_cpu(p->blksize);
85997675 4962 int err;
b411b363 4963
1952e916
AG
4964 mdev = vnr_to_mdev(tconn, pi->vnr);
4965 if (!mdev)
2735a594 4966 return -EIO;
1952e916 4967
b411b363
PR
4968 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4969
579b57ed 4970 if (p->block_id == ID_SYNCER) {
b411b363
PR
4971 dec_rs_pending(mdev);
4972 drbd_rs_failed_io(mdev, sector, size);
2735a594 4973 return 0;
b411b363 4974 }
2deb8336 4975
85997675
AG
4976 err = validate_req_change_req_state(mdev, p->block_id, sector,
4977 &mdev->write_requests, __func__,
303d1448 4978 NEG_ACKED, true);
85997675 4979 if (err) {
c3afd8f5
AG
4980 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4981 The master bio might already be completed, therefore the
4982 request is no longer in the collision hash. */
4983 /* In Protocol B we might already have got a P_RECV_ACK
4984 but then get a P_NEG_ACK afterwards. */
c3afd8f5 4985 drbd_set_out_of_sync(mdev, sector, size);
2deb8336 4986 }
2735a594 4987 return 0;
b411b363
PR
4988}
4989
1952e916 4990static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 4991{
1952e916 4992 struct drbd_conf *mdev;
e658983a 4993 struct p_block_ack *p = pi->data;
b411b363
PR
4994 sector_t sector = be64_to_cpu(p->sector);
4995
1952e916
AG
4996 mdev = vnr_to_mdev(tconn, pi->vnr);
4997 if (!mdev)
2735a594 4998 return -EIO;
1952e916 4999
b411b363 5000 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
7be8da07 5001
380207d0 5002 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
b411b363
PR
5003 (unsigned long long)sector, be32_to_cpu(p->blksize));
5004
2735a594
AG
5005 return validate_req_change_req_state(mdev, p->block_id, sector,
5006 &mdev->read_requests, __func__,
5007 NEG_ACKED, false);
b411b363
PR
5008}
5009
1952e916 5010static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5011{
1952e916 5012 struct drbd_conf *mdev;
b411b363
PR
5013 sector_t sector;
5014 int size;
e658983a 5015 struct p_block_ack *p = pi->data;
1952e916
AG
5016
5017 mdev = vnr_to_mdev(tconn, pi->vnr);
5018 if (!mdev)
2735a594 5019 return -EIO;
b411b363
PR
5020
5021 sector = be64_to_cpu(p->sector);
5022 size = be32_to_cpu(p->blksize);
b411b363
PR
5023
5024 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5025
5026 dec_rs_pending(mdev);
5027
5028 if (get_ldev_if_state(mdev, D_FAILED)) {
5029 drbd_rs_complete_io(mdev, sector);
e05e1e59 5030 switch (pi->cmd) {
d612d309
PR
5031 case P_NEG_RS_DREPLY:
5032 drbd_rs_failed_io(mdev, sector, size);
5033 case P_RS_CANCEL:
5034 break;
5035 default:
2735a594 5036 BUG();
d612d309 5037 }
b411b363
PR
5038 put_ldev(mdev);
5039 }
5040
2735a594 5041 return 0;
b411b363
PR
5042}
5043
1952e916 5044static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5045{
e658983a 5046 struct p_barrier_ack *p = pi->data;
9ed57dcb
LE
5047 struct drbd_conf *mdev;
5048 int vnr;
1952e916 5049
9ed57dcb 5050 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
b411b363 5051
9ed57dcb
LE
5052 rcu_read_lock();
5053 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5054 if (mdev->state.conn == C_AHEAD &&
5055 atomic_read(&mdev->ap_in_flight) == 0 &&
5056 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5057 mdev->start_resync_timer.expires = jiffies + HZ;
5058 add_timer(&mdev->start_resync_timer);
5059 }
c4752ef1 5060 }
9ed57dcb 5061 rcu_read_unlock();
c4752ef1 5062
2735a594 5063 return 0;
b411b363
PR
5064}
5065
1952e916 5066static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
b411b363 5067{
1952e916 5068 struct drbd_conf *mdev;
e658983a 5069 struct p_block_ack *p = pi->data;
b411b363
PR
5070 struct drbd_work *w;
5071 sector_t sector;
5072 int size;
5073
1952e916
AG
5074 mdev = vnr_to_mdev(tconn, pi->vnr);
5075 if (!mdev)
2735a594 5076 return -EIO;
1952e916 5077
b411b363
PR
5078 sector = be64_to_cpu(p->sector);
5079 size = be32_to_cpu(p->blksize);
5080
5081 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5082
5083 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
8f7bed77 5084 drbd_ov_out_of_sync_found(mdev, sector, size);
b411b363 5085 else
8f7bed77 5086 ov_out_of_sync_print(mdev);
b411b363 5087
1d53f09e 5088 if (!get_ldev(mdev))
2735a594 5089 return 0;
1d53f09e 5090
b411b363
PR
5091 drbd_rs_complete_io(mdev, sector);
5092 dec_rs_pending(mdev);
5093
ea5442af
LE
5094 --mdev->ov_left;
5095
5096 /* let's advance progress step marks only for every other megabyte */
5097 if ((mdev->ov_left & 0x200) == 0x200)
5098 drbd_advance_rs_marks(mdev, mdev->ov_left);
5099
5100 if (mdev->ov_left == 0) {
b411b363
PR
5101 w = kmalloc(sizeof(*w), GFP_NOIO);
5102 if (w) {
5103 w->cb = w_ov_finished;
a21e9298 5104 w->mdev = mdev;
e42325a5 5105 drbd_queue_work_front(&mdev->tconn->data.work, w);
b411b363
PR
5106 } else {
5107 dev_err(DEV, "kmalloc(w) failed.");
8f7bed77 5108 ov_out_of_sync_print(mdev);
b411b363
PR
5109 drbd_resync_finished(mdev);
5110 }
5111 }
1d53f09e 5112 put_ldev(mdev);
2735a594 5113 return 0;
b411b363
PR
5114}
5115
1952e916 5116static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
0ced55a3 5117{
2735a594 5118 return 0;
0ced55a3
PR
5119}
5120
a990be46 5121static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
32862ec7 5122{
082a3439 5123 struct drbd_conf *mdev;
c141ebda 5124 int vnr, not_empty = 0;
32862ec7
PR
5125
5126 do {
5127 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5128 flush_signals(current);
c141ebda
PR
5129
5130 rcu_read_lock();
5131 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5132 kref_get(&mdev->kref);
5133 rcu_read_unlock();
d3fcb490 5134 if (drbd_finish_peer_reqs(mdev)) {
c141ebda
PR
5135 kref_put(&mdev->kref, &drbd_minor_destroy);
5136 return 1;
d3fcb490 5137 }
c141ebda
PR
5138 kref_put(&mdev->kref, &drbd_minor_destroy);
5139 rcu_read_lock();
082a3439 5140 }
32862ec7 5141 set_bit(SIGNAL_ASENDER, &tconn->flags);
082a3439
PR
5142
5143 spin_lock_irq(&tconn->req_lock);
c141ebda 5144 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
082a3439
PR
5145 not_empty = !list_empty(&mdev->done_ee);
5146 if (not_empty)
5147 break;
5148 }
5149 spin_unlock_irq(&tconn->req_lock);
c141ebda 5150 rcu_read_unlock();
32862ec7
PR
5151 } while (not_empty);
5152
5153 return 0;
5154}
5155
7201b972
AG
5156struct asender_cmd {
5157 size_t pkt_size;
1952e916 5158 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
7201b972
AG
5159};
5160
5161static struct asender_cmd asender_tbl[] = {
e658983a
AG
5162 [P_PING] = { 0, got_Ping },
5163 [P_PING_ACK] = { 0, got_PingAck },
1952e916
AG
5164 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5165 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5166 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5167 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
5168 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5169 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5170 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5171 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5172 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5173 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5174 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
5175 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5176 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5177 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5178 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
7201b972
AG
5179};
5180
b411b363
PR
5181int drbd_asender(struct drbd_thread *thi)
5182{
392c8801 5183 struct drbd_tconn *tconn = thi->tconn;
b411b363 5184 struct asender_cmd *cmd = NULL;
77351055 5185 struct packet_info pi;
257d0af6 5186 int rv;
e658983a 5187 void *buf = tconn->meta.rbuf;
b411b363 5188 int received = 0;
52b061a4
AG
5189 unsigned int header_size = drbd_header_size(tconn);
5190 int expect = header_size;
44ed167d
PR
5191 bool ping_timeout_active = false;
5192 struct net_conf *nc;
bb77d34e 5193 int ping_timeo, tcp_cork, ping_int;
b411b363 5194
b411b363
PR
5195 current->policy = SCHED_RR; /* Make this a realtime task! */
5196 current->rt_priority = 2; /* more important than all other tasks */
5197
e77a0a5c 5198 while (get_t_state(thi) == RUNNING) {
80822284 5199 drbd_thread_current_set_cpu(thi);
44ed167d
PR
5200
5201 rcu_read_lock();
5202 nc = rcu_dereference(tconn->net_conf);
5203 ping_timeo = nc->ping_timeo;
bb77d34e 5204 tcp_cork = nc->tcp_cork;
44ed167d
PR
5205 ping_int = nc->ping_int;
5206 rcu_read_unlock();
5207
32862ec7 5208 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
a17647aa 5209 if (drbd_send_ping(tconn)) {
32862ec7 5210 conn_err(tconn, "drbd_send_ping has failed\n");
841ce241
AG
5211 goto reconnect;
5212 }
44ed167d
PR
5213 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5214 ping_timeout_active = true;
b411b363
PR
5215 }
5216
32862ec7
PR
5217 /* TODO: conditionally cork; it may hurt latency if we cork without
5218 much to send */
bb77d34e 5219 if (tcp_cork)
32862ec7 5220 drbd_tcp_cork(tconn->meta.socket);
a990be46
AG
5221 if (tconn_finish_peer_reqs(tconn)) {
5222 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
32862ec7 5223 goto reconnect;
082a3439 5224 }
b411b363 5225 /* but unconditionally uncork unless disabled */
bb77d34e 5226 if (tcp_cork)
32862ec7 5227 drbd_tcp_uncork(tconn->meta.socket);
b411b363
PR
5228
5229 /* short circuit, recv_msg would return EINTR anyways. */
5230 if (signal_pending(current))
5231 continue;
5232
32862ec7
PR
5233 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5234 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363
PR
5235
5236 flush_signals(current);
5237
5238 /* Note:
5239 * -EINTR (on meta) we got a signal
5240 * -EAGAIN (on meta) rcvtimeo expired
5241 * -ECONNRESET other side closed the connection
5242 * -ERESTARTSYS (on data) we got a signal
5243 * rv < 0 other than above: unexpected error!
5244 * rv == expected: full header or command
5245 * rv < expected: "woken" by signal during receive
5246 * rv == 0 : "connection shut down by peer"
5247 */
5248 if (likely(rv > 0)) {
5249 received += rv;
5250 buf += rv;
5251 } else if (rv == 0) {
32862ec7 5252 conn_err(tconn, "meta connection shut down by peer.\n");
b411b363
PR
5253 goto reconnect;
5254 } else if (rv == -EAGAIN) {
cb6518cb
LE
5255 /* If the data socket received something meanwhile,
5256 * that is good enough: peer is still alive. */
32862ec7
PR
5257 if (time_after(tconn->last_received,
5258 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
cb6518cb 5259 continue;
f36af18c 5260 if (ping_timeout_active) {
32862ec7 5261 conn_err(tconn, "PingAck did not arrive in time.\n");
b411b363
PR
5262 goto reconnect;
5263 }
32862ec7 5264 set_bit(SEND_PING, &tconn->flags);
b411b363
PR
5265 continue;
5266 } else if (rv == -EINTR) {
5267 continue;
5268 } else {
32862ec7 5269 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
b411b363
PR
5270 goto reconnect;
5271 }
5272
5273 if (received == expect && cmd == NULL) {
e658983a 5274 if (decode_header(tconn, tconn->meta.rbuf, &pi))
b411b363 5275 goto reconnect;
7201b972 5276 cmd = &asender_tbl[pi.cmd];
1952e916 5277 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
2fcb8f30
AG
5278 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5279 cmdname(pi.cmd), pi.cmd);
b411b363
PR
5280 goto disconnect;
5281 }
e658983a 5282 expect = header_size + cmd->pkt_size;
52b061a4 5283 if (pi.size != expect - header_size) {
32862ec7 5284 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
77351055 5285 pi.cmd, pi.size);
b411b363 5286 goto reconnect;
257d0af6 5287 }
b411b363
PR
5288 }
5289 if (received == expect) {
2735a594 5290 bool err;
a4fbda8e 5291
2735a594
AG
5292 err = cmd->fn(tconn, &pi);
5293 if (err) {
1952e916 5294 conn_err(tconn, "%pf failed\n", cmd->fn);
b411b363 5295 goto reconnect;
1952e916 5296 }
b411b363 5297
a4fbda8e
PR
5298 tconn->last_received = jiffies;
5299
44ed167d
PR
5300 if (cmd == &asender_tbl[P_PING_ACK]) {
5301 /* restore idle timeout */
5302 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5303 ping_timeout_active = false;
5304 }
f36af18c 5305
e658983a 5306 buf = tconn->meta.rbuf;
b411b363 5307 received = 0;
52b061a4 5308 expect = header_size;
b411b363
PR
5309 cmd = NULL;
5310 }
5311 }
5312
5313 if (0) {
5314reconnect:
bbeb641c 5315 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
5316 }
5317 if (0) {
5318disconnect:
bbeb641c 5319 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
b411b363 5320 }
32862ec7 5321 clear_bit(SIGNAL_ASENDER, &tconn->flags);
b411b363 5322
32862ec7 5323 conn_info(tconn, "asender terminated\n");
b411b363
PR
5324
5325 return 0;
5326}
This page took 0.493414 seconds and 5 git commands to generate.