Merge remote-tracking branch 'sound-asoc/for-next'
[deliverable/linux.git] / net / sunrpc / xprtsock.c
index 7e2b2fa189c340e7f0968aead6f878879e8a9a72..bf168838a0296e9387de33ad5afabb3bdf3b35f0 100644 (file)
@@ -124,7 +124,7 @@ static struct ctl_table xs_tunables_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &xprt_min_resvport_limit,
-               .extra2         = &xprt_max_resvport_limit
+               .extra2         = &xprt_max_resvport
        },
        {
                .procname       = "max_resvport",
@@ -132,7 +132,7 @@ static struct ctl_table xs_tunables_table[] = {
                .maxlen         = sizeof(unsigned int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &xprt_min_resvport_limit,
+               .extra1         = &xprt_min_resvport,
                .extra2         = &xprt_max_resvport_limit
        },
        {
@@ -177,7 +177,6 @@ static struct ctl_table sunrpc_table[] = {
  * increase over time if the server is down or not responding.
  */
 #define XS_TCP_INIT_REEST_TO   (3U * HZ)
-#define XS_TCP_MAX_REEST_TO    (5U * 60 * HZ)
 
 /*
  * TCP idle timeout; client drops the transport socket if it is idle
@@ -642,6 +641,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *xdr = &req->rq_snd_buf;
        bool zerocopy = true;
+       bool vm_wait = false;
        int status;
        int sent;
 
@@ -677,15 +677,33 @@ static int xs_tcp_send_request(struct rpc_task *task)
                        return 0;
                }
 
+               WARN_ON_ONCE(sent == 0 && status == 0);
+
+               if (status == -EAGAIN ) {
+                       /*
+                        * Return EAGAIN if we're sure we're hitting the
+                        * socket send buffer limits.
+                        */
+                       if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
+                               break;
+                       /*
+                        * Did we hit a memory allocation failure?
+                        */
+                       if (sent == 0) {
+                               status = -ENOBUFS;
+                               if (vm_wait)
+                                       break;
+                               /* Retry, knowing now that we're below the
+                                * socket send buffer limit
+                                */
+                               vm_wait = true;
+                       }
+                       continue;
+               }
                if (status < 0)
                        break;
-               if (sent == 0) {
-                       status = -EAGAIN;
-                       break;
-               }
+               vm_wait = false;
        }
-       if (status == -EAGAIN && sk_stream_is_writeable(transport->inet))
-               status = -ENOBUFS;
 
        switch (status) {
        case -ENOTSOCK:
@@ -755,11 +773,19 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
        sk->sk_error_report = transport->old_error_report;
 }
 
+static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
+{
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+}
+
 static void xs_sock_reset_connection_flags(struct rpc_xprt *xprt)
 {
        smp_mb__before_atomic();
        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
        clear_bit(XPRT_CLOSING, &xprt->state);
+       xs_sock_reset_state_flags(xprt);
        smp_mb__after_atomic();
 }
 
@@ -962,10 +988,13 @@ static void xs_local_data_receive(struct sock_xprt *transport)
                goto out;
        for (;;) {
                skb = skb_recv_datagram(sk, 0, 1, &err);
-               if (skb == NULL)
+               if (skb != NULL) {
+                       xs_local_data_read_skb(&transport->xprt, sk, skb);
+                       skb_free_datagram(sk, skb);
+                       continue;
+               }
+               if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
                        break;
-               xs_local_data_read_skb(&transport->xprt, sk, skb);
-               skb_free_datagram(sk, skb);
        }
 out:
        mutex_unlock(&transport->recv_mutex);
@@ -1043,10 +1072,13 @@ static void xs_udp_data_receive(struct sock_xprt *transport)
                goto out;
        for (;;) {
                skb = skb_recv_datagram(sk, 0, 1, &err);
-               if (skb == NULL)
+               if (skb != NULL) {
+                       xs_udp_data_read_skb(&transport->xprt, sk, skb);
+                       skb_free_datagram_locked(sk, skb);
+                       continue;
+               }
+               if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
                        break;
-               xs_udp_data_read_skb(&transport->xprt, sk, skb);
-               skb_free_datagram(sk, skb);
        }
 out:
        mutex_unlock(&transport->recv_mutex);
@@ -1074,7 +1106,14 @@ static void xs_data_ready(struct sock *sk)
        if (xprt != NULL) {
                struct sock_xprt *transport = container_of(xprt,
                                struct sock_xprt, xprt);
-               queue_work(rpciod_workqueue, &transport->recv_worker);
+               transport->old_data_ready(sk);
+               /* Any data means we had a useful conversation, so
+                * then we don't need to delay the next reconnect
+                */
+               if (xprt->reestablish_timeout)
+                       xprt->reestablish_timeout = 0;
+               if (!test_and_set_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+                       queue_work(xprtiod_workqueue, &transport->recv_worker);
        }
        read_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1474,10 +1513,15 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
        for (;;) {
                lock_sock(sk);
                read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-               release_sock(sk);
-               if (read <= 0)
-                       break;
-               total += read;
+               if (read <= 0) {
+                       clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+                       release_sock(sk);
+                       if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+                               break;
+               } else {
+                       release_sock(sk);
+                       total += read;
+               }
                rd_desc.count = 65536;
        }
 out:
@@ -1492,34 +1536,6 @@ static void xs_tcp_data_receive_workfn(struct work_struct *work)
        xs_tcp_data_receive(transport);
 }
 
-/**
- * xs_tcp_data_ready - "data ready" callback for TCP sockets
- * @sk: socket with data to read
- *
- */
-static void xs_tcp_data_ready(struct sock *sk)
-{
-       struct sock_xprt *transport;
-       struct rpc_xprt *xprt;
-
-       dprintk("RPC:       xs_tcp_data_ready...\n");
-
-       read_lock_bh(&sk->sk_callback_lock);
-       if (!(xprt = xprt_from_sock(sk)))
-               goto out;
-       transport = container_of(xprt, struct sock_xprt, xprt);
-
-       /* Any data means we had a useful conversation, so
-        * the we don't need to delay the next reconnect
-        */
-       if (xprt->reestablish_timeout)
-               xprt->reestablish_timeout = 0;
-       queue_work(rpciod_workqueue, &transport->recv_worker);
-
-out:
-       read_unlock_bh(&sk->sk_callback_lock);
-}
-
 /**
  * xs_tcp_state_change - callback to handle TCP socket state changes
  * @sk: socket whose state has changed
@@ -1714,7 +1730,7 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
 
 static unsigned short xs_get_random_port(void)
 {
-       unsigned short range = xprt_max_resvport - xprt_min_resvport;
+       unsigned short range = xprt_max_resvport - xprt_min_resvport + 1;
        unsigned short rand = (unsigned short) prandom_u32() % range;
        return rand + xprt_min_resvport;
 }
@@ -2156,6 +2172,8 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                write_unlock_bh(&sk->sk_callback_lock);
        }
        xs_udp_do_set_buffer_size(xprt);
+
+       xprt->stat.connect_start = jiffies;
 }
 
 static void xs_udp_setup_socket(struct work_struct *work)
@@ -2219,6 +2237,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                unsigned int keepcnt = xprt->timeout->to_retries + 1;
                unsigned int opt_on = 1;
                unsigned int timeo;
+               unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
                /* TCP Keepalive options */
                kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
@@ -2230,6 +2249,16 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
                                (char *)&keepcnt, sizeof(keepcnt));
 
+               /* Avoid temporary address, they are bad for long-lived
+                * connections such as NFS mounts.
+                * RFC4941, section 3.6 suggests that:
+                *    Individual applications, which have specific
+                *    knowledge about the normal duration of connections,
+                *    MAY override this as appropriate.
+                */
+               kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
+                               (char *)&addr_pref, sizeof(addr_pref));
+
                /* TCP user timeout (see RFC5482) */
                timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
                        (xprt->timeout->to_retries + 1);
@@ -2241,7 +2270,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                xs_save_old_callbacks(transport, sk);
 
                sk->sk_user_data = xprt;
-               sk->sk_data_ready = xs_tcp_data_ready;
+               sk->sk_data_ready = xs_data_ready;
                sk->sk_state_change = xs_tcp_state_change;
                sk->sk_write_space = xs_tcp_write_space;
                sock_set_flag(sk, SOCK_FASYNC);
@@ -2278,6 +2307,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
                /* SYN_SENT! */
                if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+               break;
+       case -EADDRNOTAVAIL:
+               /* Source port number is unavailable. Try a new one! */
+               transport->srcport = 0;
        }
 out:
        return ret;
@@ -2352,6 +2385,25 @@ out:
        xprt_wake_pending_tasks(xprt, status);
 }
 
+static unsigned long xs_reconnect_delay(const struct rpc_xprt *xprt)
+{
+       unsigned long start, now = jiffies;
+
+       start = xprt->stat.connect_start + xprt->reestablish_timeout;
+       if (time_after(start, now))
+               return start - now;
+       return 0;
+}
+
+static void xs_reconnect_backoff(struct rpc_xprt *xprt)
+{
+       xprt->reestablish_timeout <<= 1;
+       if (xprt->reestablish_timeout > xprt->max_reconnect_timeout)
+               xprt->reestablish_timeout = xprt->max_reconnect_timeout;
+       if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+               xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+}
+
 /**
  * xs_connect - connect a socket to a remote endpoint
  * @xprt: pointer to transport structure
@@ -2369,6 +2421,7 @@ out:
 static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       unsigned long delay = 0;
 
        WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport));
 
@@ -2380,19 +2433,15 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task)
                /* Start by resetting any existing state */
                xs_reset_transport(transport);
 
-               queue_delayed_work(rpciod_workqueue,
-                                  &transport->connect_worker,
-                                  xprt->reestablish_timeout);
-               xprt->reestablish_timeout <<= 1;
-               if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-               if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
-                       xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
-       } else {
+               delay = xs_reconnect_delay(xprt);
+               xs_reconnect_backoff(xprt);
+
+       } else
                dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-               queue_delayed_work(rpciod_workqueue,
-                                  &transport->connect_worker, 0);
-       }
+
+       queue_delayed_work(xprtiod_workqueue,
+                       &transport->connect_worker,
+                       delay);
 }
 
 /**
@@ -2944,6 +2993,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        xprt->ops = &xs_tcp_ops;
        xprt->timeout = &xs_tcp_default_timeout;
 
+       xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+
        INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
        INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
 
@@ -3153,8 +3204,12 @@ static int param_set_uint_minmax(const char *val,
 
 static int param_set_portnr(const char *val, const struct kernel_param *kp)
 {
-       return param_set_uint_minmax(val, kp,
+       if (kp->arg == &xprt_min_resvport)
+               return param_set_uint_minmax(val, kp,
                        RPC_MIN_RESVPORT,
+                       xprt_max_resvport);
+       return param_set_uint_minmax(val, kp,
+                       xprt_min_resvport,
                        RPC_MAX_RESVPORT);
 }
 
This page took 0.027617 seconds and 5 git commands to generate.