Merge branch 'tcp-eor'

author David S. Miller <davem@davemloft.net>

Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)

committer David S. Miller <davem@davemloft.net>

Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)
author David S. Miller <davem@davemloft.net>
Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)
committer David S. Miller <davem@davemloft.net>
Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 992f317c1abed4430893794906e2512ba5b34059..24ec80483805fb76e58b917eb9ceefb1025b1c86 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -761,7 +761,8 @@ struct tcp_skb_cb {
  
         __u8            ip_dsfield;     /* IPv4 tos or IPv6 dsfield     */
         __u8            txstamp_ack:1,  /* Record TX timestamp for ack? */
-                       unused:7;
+                       eor:1,          /* Is skb MSG_EOR marked? */
+                       unused:6;
         __u32           ack_seq;        /* Sequence number ACK'd        */
         union {
                 struct inet_skb_parm    h4;
@@ -808,6 +809,11 @@ static inline int tcp_skb_mss(const struct sk_buff *skb)
         return TCP_SKB_CB(skb)->tcp_gso_size;
  }
  
+static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
+{
+       return likely(!TCP_SKB_CB(skb)->eor);
+}
+
  /* Events passed to congestion control interface */
  enum tcp_ca_event {
         CA_EVENT_TX_START,      /* first transmit when no packets in flight */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c

index 91993782a94797f820f81160dd3b799c78ac398f..cb4d1cabb42c4fd89c72192c97ac376d1330aeca 100644 (file)
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -909,7 +909,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
                 int copy, i;
                 bool can_coalesce;
  
-               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
+               if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+                   !tcp_skb_can_collapse_to(skb)) {
  new_segment:
                         if (!sk_stream_memory_free(sk))
                                 goto wait_for_sndbuf;
@@ -1157,7 +1158,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
                         copy = max - skb->len;
                 }
  
-               if (copy <= 0) {
+               if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
  new_segment:
                         /* Allocate new segment. If the interface is SG,
                          * allocate skb fitting to single page.
@@ -1251,6 +1252,8 @@ new_segment:
                 copied += copy;
                 if (!msg_data_left(msg)) {
                         tcp_tx_timestamp(sk, sockc.tsflags, skb);
+                       if (unlikely(flags & MSG_EOR))
+                               TCP_SKB_CB(skb)->eor = 1;
                         goto out;
                 }
  
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 70c370b937621b4e5144fe3fd47bb1bddc9fa6bd..1fb19c91e0919b7a9a51e8cb928993a1f5ff1c7b 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1303,6 +1303,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
         }
  
         TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+       TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor;
         if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
                 TCP_SKB_CB(prev)->end_seq++;
  
@@ -1368,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
         if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
                 goto fallback;
  
+       if (!tcp_skb_can_collapse_to(prev))
+               goto fallback;
+
         in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
                   !before(end_seq, TCP_SKB_CB(skb)->end_seq);
  
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c

index b3a31b4df57c6c2e5f325c6c3e99440986070444..1a487ff95d4c0572737e4f972666229f12ef1e08 100644 (file)
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1134,6 +1134,12 @@ static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
         }
  }
  
+static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
+{
+       TCP_SKB_CB(skb2)->eor = TCP_SKB_CB(skb)->eor;
+       TCP_SKB_CB(skb)->eor = 0;
+}
+
  /* Function to create two new TCP segments.  Shrinks the given segment
   * to the specified size and appends a new segment with the rest of the
   * packet to the list.  This won't be called frequently, I hope.
@@ -1179,6 +1185,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
         TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
         TCP_SKB_CB(buff)->tcp_flags = flags;
         TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
+       tcp_skb_fragment_eor(skb, buff);
  
         if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
                 /* Copy and checksum data tail into the new buffer. */
@@ -1739,6 +1746,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
         /* This packet was never sent out yet, so no SACK bits. */
         TCP_SKB_CB(buff)->sacked = 0;
  
+       tcp_skb_fragment_eor(skb, buff);
+
         buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
         skb_split(skb, buff, len);
         tcp_fragment_tstamp(skb, buff);
@@ -2499,6 +2508,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
          * packet counting does not break.
          */
         TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
+       TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor;
  
         /* changed transmit queue under us so clear hints */
         tcp_clear_retrans_hints_partial(tp);
@@ -2550,6 +2560,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
                 if (!tcp_can_collapse(sk, skb))
                         break;
  
+               if (!tcp_skb_can_collapse_to(to))
+                       break;
+
                 space -= skb->len;
  
                 if (first) {
author	David S. Miller <davem@davemloft.net>
	Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)
committer	David S. Miller <davem@davemloft.net>
	Thu, 28 Apr 2016 20:14:20 +0000 (16:14 -0400)
include/net/tcp.h		patch \| blob \| blame \| history
net/ipv4/tcp.c		patch \| blob \| blame \| history
net/ipv4/tcp_input.c		patch \| blob \| blame \| history
net/ipv4/tcp_output.c		patch \| blob \| blame \| history