Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[deliverable/linux.git] / net / ipv4 / tcp.c
index 5f64193418216393448ec9d8e839f83e85636716..f32c02e2a54346cf4e120d39e17d4d1b5e966189 100644 (file)
@@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
        if (sk->sk_shutdown & RCV_SHUTDOWN)
                mask |= POLLIN | POLLRDNORM | POLLRDHUP;
 
-       /* Connected? */
-       if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) {
+       /* Connected or passive Fast Open socket? */
+       if (sk->sk_state != TCP_SYN_SENT &&
+           (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) {
                int target = sock_rcvlowat(sk, 0, INT_MAX);
 
                if (tp->urg_seq == tp->copied_seq &&
@@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
        ssize_t copied;
        long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
-       /* Wait for a connection to finish. */
-       if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+       /* Wait for a connection to finish. One exception is TCP Fast Open
+        * (passive side) where data is allowed to be sent before a connection
+        * is fully established.
+        */
+       if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+           !tcp_passive_fastopen(sk)) {
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
                        goto out_err;
+       }
 
        clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
@@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
-       /* Wait for a connection to finish. */
-       if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
+       /* Wait for a connection to finish. One exception is TCP Fast Open
+        * (passive side) where data is allowed to be sent before a connection
+        * is fully established.
+        */
+       if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
+           !tcp_passive_fastopen(sk)) {
                if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
                        goto do_error;
+       }
 
        if (unlikely(tp->repair)) {
                if (tp->repair_queue == TCP_RECV_QUEUE) {
@@ -1139,78 +1150,43 @@ new_segment:
                                if (err)
                                        goto do_fault;
                        } else {
-                               bool merge = false;
+                               bool merge = true;
                                int i = skb_shinfo(skb)->nr_frags;
-                               struct page *page = sk->sk_sndmsg_page;
-                               int off;
-
-                               if (page && page_count(page) == 1)
-                                       sk->sk_sndmsg_off = 0;
-
-                               off = sk->sk_sndmsg_off;
-
-                               if (skb_can_coalesce(skb, i, page, off) &&
-                                   off != PAGE_SIZE) {
-                                       /* We can extend the last page
-                                        * fragment. */
-                                       merge = true;
-                               } else if (i == MAX_SKB_FRAGS || !sg) {
-                                       /* Need to add new fragment and cannot
-                                        * do this because interface is non-SG,
-                                        * or because all the page slots are
-                                        * busy. */
-                                       tcp_mark_push(tp, skb);
-                                       goto new_segment;
-                               } else if (page) {
-                                       if (off == PAGE_SIZE) {
-                                               put_page(page);
-                                               sk->sk_sndmsg_page = page = NULL;
-                                               off = 0;
+                               struct page_frag *pfrag = sk_page_frag(sk);
+
+                               if (!sk_page_frag_refill(sk, pfrag))
+                                       goto wait_for_memory;
+
+                               if (!skb_can_coalesce(skb, i, pfrag->page,
+                                                     pfrag->offset)) {
+                                       if (i == MAX_SKB_FRAGS || !sg) {
+                                               tcp_mark_push(tp, skb);
+                                               goto new_segment;
                                        }
-                               } else
-                                       off = 0;
+                                       merge = false;
+                               }
 
-                               if (copy > PAGE_SIZE - off)
-                                       copy = PAGE_SIZE - off;
+                               copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
                                if (!sk_wmem_schedule(sk, copy))
                                        goto wait_for_memory;
 
-                               if (!page) {
-                                       /* Allocate new cache page. */
-                                       if (!(page = sk_stream_alloc_page(sk)))
-                                               goto wait_for_memory;
-                               }
-
-                               /* Time to copy data. We are close to
-                                * the end! */
                                err = skb_copy_to_page_nocache(sk, from, skb,
-                                                              page, off, copy);
-                               if (err) {
-                                       /* If this page was new, give it to the
-                                        * socket so it does not get leaked.
-                                        */
-                                       if (!sk->sk_sndmsg_page) {
-                                               sk->sk_sndmsg_page = page;
-                                               sk->sk_sndmsg_off = 0;
-                                       }
+                                                              pfrag->page,
+                                                              pfrag->offset,
+                                                              copy);
+                               if (err)
                                        goto do_error;
-                               }
 
                                /* Update the skb. */
                                if (merge) {
                                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                                } else {
-                                       skb_fill_page_desc(skb, i, page, off, copy);
-                                       if (sk->sk_sndmsg_page) {
-                                               get_page(page);
-                                       } else if (off + copy < PAGE_SIZE) {
-                                               get_page(page);
-                                               sk->sk_sndmsg_page = page;
-                                       }
+                                       skb_fill_page_desc(skb, i, pfrag->page,
+                                                          pfrag->offset, copy);
+                                       get_page(pfrag->page);
                                }
-
-                               sk->sk_sndmsg_off = off + copy;
+                               pfrag->offset += copy;
                        }
 
                        if (!copied)
@@ -2150,6 +2126,10 @@ void tcp_close(struct sock *sk, long timeout)
                 * they look as CLOSING or LAST_ACK for Linux)
                 * Probably, I missed some more holelets.
                 *                                              --ANK
+                * XXX (TFO) - To start off we don't support SYN+ACK+FIN
+                * in a single packet! (May consider it later but will
+                * probably need API support or TCP_CORK SYN-ACK until
+                * data is written and socket is closed.)
                 */
                tcp_send_fin(sk);
        }
@@ -2221,8 +2201,16 @@ adjudge_to_death:
                }
        }
 
-       if (sk->sk_state == TCP_CLOSE)
+       if (sk->sk_state == TCP_CLOSE) {
+               struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+               /* We could get here with a non-NULL req if the socket is
+                * aborted (e.g., closed with unread data) before 3WHS
+                * finishes.
+                */
+               if (req != NULL)
+                       reqsk_fastopen_remove(sk, req, false);
                inet_csk_destroy_sock(sk);
+       }
        /* Otherwise, socket is reprieved until protocol close. */
 
 out:
@@ -2308,6 +2296,13 @@ int tcp_disconnect(struct sock *sk, int flags)
 }
 EXPORT_SYMBOL(tcp_disconnect);
 
+void tcp_sock_destruct(struct sock *sk)
+{
+       inet_sock_destruct(sk);
+
+       kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
+}
+
 static inline bool tcp_can_repair_sock(const struct sock *sk)
 {
        return capable(CAP_NET_ADMIN) &&
@@ -2701,6 +2696,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                else
                        icsk->icsk_user_timeout = msecs_to_jiffies(val);
                break;
+
+       case TCP_FASTOPEN:
+               if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
+                   TCPF_LISTEN)))
+                       err = fastopen_init_queue(sk, val);
+               else
+                       err = -EINVAL;
+               break;
        default:
                err = -ENOPROTOOPT;
                break;
@@ -3514,11 +3517,15 @@ EXPORT_SYMBOL(tcp_cookie_generator);
 
 void tcp_done(struct sock *sk)
 {
+       struct request_sock *req = tcp_sk(sk)->fastopen_rsk;
+
        if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV)
                TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
 
        tcp_set_state(sk, TCP_CLOSE);
        tcp_clear_xmit_timers(sk);
+       if (req != NULL)
+               reqsk_fastopen_remove(sk, req, false);
 
        sk->sk_shutdown = SHUTDOWN_MASK;
 
This page took 0.026893 seconds and 5 git commands to generate.