tcp: undo on DSACK during recovery

[deliverable/linux.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index b358e8c986076c314907420e402cb11b367e18ac..907311c9a0121226209a93f381bdd0d6774e5c54 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1255,8 +1255,6 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
  
         if (skb == tp->retransmit_skb_hint)
                 tp->retransmit_skb_hint = prev;
-       if (skb == tp->scoreboard_skb_hint)
-               tp->scoreboard_skb_hint = prev;
         if (skb == tp->lost_skb_hint) {
                 tp->lost_skb_hint = prev;
                 tp->lost_cnt_hint -= tcp_skb_pcount(prev);
@@ -1964,20 +1962,6 @@ static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
         return true;
  }
  
-static inline int tcp_skb_timedout(const struct sock *sk,
-                                  const struct sk_buff *skb)
-{
-       return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
-}
-
-static inline int tcp_head_timedout(const struct sock *sk)
-{
-       const struct tcp_sock *tp = tcp_sk(sk);
-
-       return tp->packets_out &&
-              tcp_skb_timedout(sk, tcp_write_queue_head(sk));
-}
-
  /* Linux NewReno/SACK/FACK/ECN state machine.
   * --------------------------------------
   *
@@ -2084,12 +2068,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
         if (tcp_dupack_heuristics(tp) > tp->reordering)
                 return true;
  
-       /* Trick#3 : when we use RFC2988 timer restart, fast
-        * retransmit can be triggered by timeout of queue head.
-        */
-       if (tcp_is_fack(tp) && tcp_head_timedout(sk))
-               return true;
-
         /* Trick#4: It is still not OK... But will it be useful to delay
          * recovery more?
          */
@@ -2126,44 +2104,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
         return false;
  }
  
-/* New heuristics: it is possible only after we switched to restart timer
- * each time when something is ACKed. Hence, we can detect timed out packets
- * during fast retransmit without falling to slow start.
- *
- * Usefulness of this as is very questionable, since we should know which of
- * the segments is the next to timeout which is relatively expensive to find
- * in general case unless we add some data structure just for that. The
- * current approach certainly won't find the right one too often and when it
- * finally does find _something_ it usually marks large part of the window
- * right away (because a retransmission with a larger timestamp blocks the
- * loop from advancing). -ij
- */
-static void tcp_timeout_skbs(struct sock *sk)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       struct sk_buff *skb;
-
-       if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
-               return;
-
-       skb = tp->scoreboard_skb_hint;
-       if (tp->scoreboard_skb_hint == NULL)
-               skb = tcp_write_queue_head(sk);
-
-       tcp_for_write_queue_from(skb, sk) {
-               if (skb == tcp_send_head(sk))
-                       break;
-               if (!tcp_skb_timedout(sk, skb))
-                       break;
-
-               tcp_skb_mark_lost(tp, skb);
-       }
-
-       tp->scoreboard_skb_hint = skb;
-
-       tcp_verify_left_out(tp);
-}
-
  /* Detect loss in event "A" above by marking head of queue up as lost.
   * For FACK or non-SACK(Reno) senders, the first "packets" number of segments
   * are considered lost. For RFC3517 SACK, a segment is considered lost if it
@@ -2249,8 +2189,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
                 else if (fast_rexmit)
                         tcp_mark_head_lost(sk, 1, 1);
         }
-
-       tcp_timeout_skbs(sk);
  }
  
  /* CWND moderation, preventing bursts due to too big ACKs
@@ -2305,10 +2243,22 @@ static void DBGUNDO(struct sock *sk, const char *msg)
  #define DBGUNDO(x...) do { } while (0)
  #endif
  
-static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
+static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
+       if (unmark_loss) {
+               struct sk_buff *skb;
+
+               tcp_for_write_queue(skb, sk) {
+                       if (skb == tcp_send_head(sk))
+                               break;
+                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+               }
+               tp->lost_out = 0;
+               tcp_clear_all_retrans_hints(tp);
+       }
+
         if (tp->prior_ssthresh) {
                 const struct inet_connection_sock *icsk = inet_csk(sk);
  
@@ -2317,7 +2267,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
                 else
                         tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
  
-               if (undo_ssthresh && tp->prior_ssthresh > tp->snd_ssthresh) {
+               if (tp->prior_ssthresh > tp->snd_ssthresh) {
                         tp->snd_ssthresh = tp->prior_ssthresh;
                         TCP_ECN_withdraw_cwr(tp);
                 }
@@ -2325,6 +2275,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh)
                 tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh);
         }
         tp->snd_cwnd_stamp = tcp_time_stamp;
+       tp->undo_marker = 0;
  }
  
  static inline bool tcp_may_undo(const struct tcp_sock *tp)
@@ -2344,14 +2295,13 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                  * or our original transmission succeeded.
                  */
                 DBGUNDO(sk, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans");
-               tcp_undo_cwr(sk, true);
+               tcp_undo_cwnd_reduction(sk, false);
                 if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss)
                         mib_idx = LINUX_MIB_TCPLOSSUNDO;
                 else
                         mib_idx = LINUX_MIB_TCPFULLUNDO;
  
                 NET_INC_STATS_BH(sock_net(sk), mib_idx);
-               tp->undo_marker = 0;
         }
         if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
                 /* Hold old state until something *above* high_seq
@@ -2365,16 +2315,17 @@ static bool tcp_try_undo_recovery(struct sock *sk)
  }
  
  /* Try to undo cwnd reduction, because D-SACKs acked all retransmitted data */
-static void tcp_try_undo_dsack(struct sock *sk)
+static bool tcp_try_undo_dsack(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
         if (tp->undo_marker && !tp->undo_retrans) {
                 DBGUNDO(sk, "D-SACK");
-               tcp_undo_cwr(sk, true);
-               tp->undo_marker = 0;
+               tcp_undo_cwnd_reduction(sk, false);
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDSACKUNDO);
+               return true;
         }
+       return false;
  }
  
  /* We can clear retrans_stamp when there are no retransmissions in the
@@ -2406,60 +2357,20 @@ static bool tcp_any_retrans_done(const struct sock *sk)
         return false;
  }
  
-/* Undo during fast recovery after partial ACK. */
-
-static int tcp_try_undo_partial(struct sock *sk, int acked)
-{
-       struct tcp_sock *tp = tcp_sk(sk);
-       /* Partial ACK arrived. Force Hoe's retransmit. */
-       int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
-
-       if (tcp_may_undo(tp)) {
-               /* Plain luck! Hole if filled with delayed
-                * packet, rather than with a retransmit.
-                */
-               if (!tcp_any_retrans_done(sk))
-                       tp->retrans_stamp = 0;
-
-               tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
-
-               DBGUNDO(sk, "Hoe");
-               tcp_undo_cwr(sk, false);
-               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
-
-               /* So... Do not make Hoe's retransmit yet.
-                * If the first packet was delayed, the rest
-                * ones are most probably delayed as well.
-                */
-               failed = 0;
-       }
-       return failed;
-}
-
  /* Undo during loss recovery after partial ACK or using F-RTO. */
  static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
         if (frto_undo || tcp_may_undo(tp)) {
-               struct sk_buff *skb;
-               tcp_for_write_queue(skb, sk) {
-                       if (skb == tcp_send_head(sk))
-                               break;
-                       TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
-               }
-
-               tcp_clear_all_retrans_hints(tp);
+               tcp_undo_cwnd_reduction(sk, true);
  
                 DBGUNDO(sk, "partial loss");
-               tp->lost_out = 0;
-               tcp_undo_cwr(sk, true);
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSUNDO);
                 if (frto_undo)
                         NET_INC_STATS_BH(sock_net(sk),
                                          LINUX_MIB_TCPSPURIOUSRTOS);
                 inet_csk(sk)->icsk_retransmits = 0;
-               tp->undo_marker = 0;
                 if (frto_undo || tcp_is_sack(tp))
                         tcp_set_ca_state(sk, TCP_CA_Open);
                 return true;
@@ -2492,12 +2403,14 @@ static void tcp_init_cwnd_reduction(struct sock *sk, const bool set_ssthresh)
         TCP_ECN_queue_cwr(tp);
  }
  
-static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
                                int fast_rexmit)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         int sndcnt = 0;
         int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
+       int newly_acked_sacked = prior_unsacked -
+                                (tp->packets_out - tp->sacked_out);
  
         tp->prr_delivered += newly_acked_sacked;
         if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
@@ -2554,7 +2467,7 @@ static void tcp_try_keep_open(struct sock *sk)
         }
  }
  
-static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
+static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -2571,7 +2484,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, int newly_acked_sacked)
                 if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
                         tcp_moderate_cwnd(tp);
         } else {
-               tcp_cwnd_reduction(sk, newly_acked_sacked, 0);
+               tcp_cwnd_reduction(sk, prior_unsacked, 0);
         }
  }
  
@@ -2729,6 +2642,40 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
         tcp_xmit_retransmit_queue(sk);
  }
  
+/* Undo during fast recovery after partial ACK. */
+static bool tcp_try_undo_partial(struct sock *sk, const int acked,
+                                const int prior_unsacked)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (tp->undo_marker && tcp_packet_delayed(tp)) {
+               /* Plain luck! Hole if filled with delayed
+                * packet, rather than with a retransmit.
+                */
+               tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1);
+
+               /* We are getting evidence that the reordering degree is higher
+                * than we realized. If there are no retransmits out then we
+                * can undo. Otherwise we clock out new packets but do not
+                * mark more packets lost or retransmit more.
+                */
+               if (tp->retrans_out) {
+                       tcp_cwnd_reduction(sk, prior_unsacked, 0);
+                       return true;
+               }
+
+               if (!tcp_any_retrans_done(sk))
+                       tp->retrans_stamp = 0;
+
+               DBGUNDO(sk, "partial recovery");
+               tcp_undo_cwnd_reduction(sk, true);
+               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPARTIALUNDO);
+               tcp_try_keep_open(sk);
+               return true;
+       }
+       return false;
+}
+
  /* Process an event, which can update packets-in-flight not trivially.
   * Main goal of this function is to calculate new estimate for left_out,
   * taking into account both packets sitting in receiver's buffer and
@@ -2740,15 +2687,14 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
   * It does _not_ decide what to send, it is made in function
   * tcp_xmit_retransmit_queue().
   */
-static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
-                                 int prior_sacked, bool is_dupack,
-                                 int flag)
+static void tcp_fastretrans_alert(struct sock *sk, const int acked,
+                                 const int prior_unsacked,
+                                 bool is_dupack, int flag)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
-       int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+       bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
                                     (tcp_fackets_out(tp) > tp->reordering));
-       int newly_acked_sacked = 0;
         int fast_rexmit = 0;
  
         if (WARN_ON(!tp->packets_out && tp->sacked_out))
@@ -2800,9 +2746,17 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                 if (!(flag & FLAG_SND_UNA_ADVANCED)) {
                         if (tcp_is_reno(tp) && is_dupack)
                                 tcp_add_reno_sack(sk);
-               } else
-                       do_lost = tcp_try_undo_partial(sk, pkts_acked);
-               newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
+               } else {
+                       if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+                               return;
+                       /* Partial ACK arrived. Force fast retransmit. */
+                       do_lost = tcp_is_reno(tp) ||
+                                 tcp_fackets_out(tp) > tp->reordering;
+               }
+               if (tcp_try_undo_dsack(sk)) {
+                       tcp_try_keep_open(sk);
+                       return;
+               }
                 break;
         case TCP_CA_Loss:
                 tcp_process_loss(sk, flag, is_dupack);
@@ -2816,13 +2770,12 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                         if (is_dupack)
                                 tcp_add_reno_sack(sk);
                 }
-               newly_acked_sacked = pkts_acked + tp->sacked_out - prior_sacked;
  
                 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
                         tcp_try_undo_dsack(sk);
  
                 if (!tcp_time_to_recover(sk, flag)) {
-                       tcp_try_to_open(sk, flag, newly_acked_sacked);
+                       tcp_try_to_open(sk, flag, prior_unsacked);
                         return;
                 }
  
@@ -2842,9 +2795,9 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
                 fast_rexmit = 1;
         }
  
-       if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+       if (do_lost)
                 tcp_update_scoreboard(sk, fast_rexmit);
-       tcp_cwnd_reduction(sk, newly_acked_sacked, fast_rexmit);
+       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
         tcp_xmit_retransmit_queue(sk);
  }
  
@@ -3075,7 +3028,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  
                 tcp_unlink_write_queue(skb, sk);
                 sk_wmem_free_skb(sk, skb);
-               tp->scoreboard_skb_hint = NULL;
                 if (skb == tp->retransmit_skb_hint)
                         tp->retransmit_skb_hint = NULL;
                 if (skb == tp->lost_skb_hint)
@@ -3328,9 +3280,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         bool is_dupack = false;
         u32 prior_in_flight;
         u32 prior_fackets;
-       int prior_packets;
-       int prior_sacked = tp->sacked_out;
-       int pkts_acked = 0;
+       int prior_packets = tp->packets_out;
+       const int prior_unsacked = tp->packets_out - tp->sacked_out;
+       int acked = 0; /* Number of packets newly acked */
  
         /* If the ack is older than previous acks
          * then we can probably ignore it.
@@ -3401,21 +3353,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         sk->sk_err_soft = 0;
         icsk->icsk_probes_out = 0;
         tp->rcv_tstamp = tcp_time_stamp;
-       prior_packets = tp->packets_out;
         if (!prior_packets)
                 goto no_queue;
  
         /* See if we can take anything off of the retransmit queue. */
+       acked = tp->packets_out;
         flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
-
-       pkts_acked = prior_packets - tp->packets_out;
+       acked -= tp->packets_out;
  
         if (tcp_ack_is_dubious(sk, flag)) {
                 /* Advance CWND, if state allows this. */
                 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
                         tcp_cong_avoid(sk, ack, prior_in_flight);
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-               tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+               tcp_fastretrans_alert(sk, acked, prior_unsacked,
                                       is_dupack, flag);
         } else {
                 if (flag & FLAG_DATA_ACKED)
@@ -3438,7 +3389,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  no_queue:
         /* If data was DSACKed, see if we can undo a cwnd reduction. */
         if (flag & FLAG_DSACKING_ACK)
-               tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+               tcp_fastretrans_alert(sk, acked, prior_unsacked,
                                       is_dupack, flag);
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
@@ -3461,7 +3412,7 @@ old_ack:
          */
         if (TCP_SKB_CB(skb)->sacked) {
                 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
-               tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
+               tcp_fastretrans_alert(sk, acked, prior_unsacked,
                                       is_dupack, flag);
         }
  
@@ -5596,6 +5547,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct request_sock *req;
         int queued = 0;
+       bool acceptable;
  
         tp->rx_opt.saw_tstamp = 0;
  
@@ -5666,157 +5618,147 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
                 return 0;
  
         /* step 5: check the ACK field */
-       if (true) {
-               int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
-                                                 FLAG_UPDATE_TS_RECENT) > 0;
-
-               switch (sk->sk_state) {
-               case TCP_SYN_RECV:
-                       if (acceptable) {
-                               /* Once we leave TCP_SYN_RECV, we no longer
-                                * need req so release it.
-                                */
-                               if (req) {
-                                       tcp_synack_rtt_meas(sk, req);
-                                       tp->total_retrans = req->num_retrans;
-
-                                       reqsk_fastopen_remove(sk, req, false);
-                               } else {
-                                       /* Make sure socket is routed, for
-                                        * correct metrics.
-                                        */
-                                       icsk->icsk_af_ops->rebuild_header(sk);
-                                       tcp_init_congestion_control(sk);
+       acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
+                                     FLAG_UPDATE_TS_RECENT) > 0;
  
-                                       tcp_mtup_init(sk);
-                                       tcp_init_buffer_space(sk);
-                                       tp->copied_seq = tp->rcv_nxt;
-                               }
-                               smp_mb();
-                               tcp_set_state(sk, TCP_ESTABLISHED);
-                               sk->sk_state_change(sk);
-
-                               /* Note, that this wakeup is only for marginal
-                                * crossed SYN case. Passively open sockets
-                                * are not waked up, because sk->sk_sleep ==
-                                * NULL and sk->sk_socket == NULL.
-                                */
-                               if (sk->sk_socket)
-                                       sk_wake_async(sk,
-                                                     SOCK_WAKE_IO, POLL_OUT);
-
-                               tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
-                               tp->snd_wnd = ntohs(th->window) <<
-                                             tp->rx_opt.snd_wscale;
-                               tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
-
-                               if (tp->rx_opt.tstamp_ok)
-                                       tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
-
-                               if (req) {
-                                       /* Re-arm the timer because data may
-                                        * have been sent out. This is similar
-                                        * to the regular data transmission case
-                                        * when new data has just been ack'ed.
-                                        *
-                                        * (TFO) - we could try to be more
-                                        * aggressive and retranmitting any data
-                                        * sooner based on when they were sent
-                                        * out.
-                                        */
-                                       tcp_rearm_rto(sk);
-                               } else
-                                       tcp_init_metrics(sk);
+       switch (sk->sk_state) {
+       case TCP_SYN_RECV:
+               if (!acceptable)
+                       return 1;
  
-                               /* Prevent spurious tcp_cwnd_restart() on
-                                * first data packet.
-                                */
-                               tp->lsndtime = tcp_time_stamp;
+               /* Once we leave TCP_SYN_RECV, we no longer need req
+                * so release it.
+                */
+               if (req) {
+                       tcp_synack_rtt_meas(sk, req);
+                       tp->total_retrans = req->num_retrans;
  
-                               tcp_initialize_rcv_mss(sk);
-                               tcp_fast_path_on(tp);
-                       } else {
-                               return 1;
-                       }
-                       break;
+                       reqsk_fastopen_remove(sk, req, false);
+               } else {
+                       /* Make sure socket is routed, for correct metrics. */
+                       icsk->icsk_af_ops->rebuild_header(sk);
+                       tcp_init_congestion_control(sk);
+
+                       tcp_mtup_init(sk);
+                       tcp_init_buffer_space(sk);
+                       tp->copied_seq = tp->rcv_nxt;
+               }
+               smp_mb();
+               tcp_set_state(sk, TCP_ESTABLISHED);
+               sk->sk_state_change(sk);
+
+               /* Note, that this wakeup is only for marginal crossed SYN case.
+                * Passively open sockets are not waked up, because
+                * sk->sk_sleep == NULL and sk->sk_socket == NULL.
+                */
+               if (sk->sk_socket)
+                       sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
+
+               tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
+               tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
+               tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
+
+               if (tp->rx_opt.tstamp_ok)
+                       tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
  
-               case TCP_FIN_WAIT1:
-                       /* If we enter the TCP_FIN_WAIT1 state and we are a
-                        * Fast Open socket and this is the first acceptable
-                        * ACK we have received, this would have acknowledged
-                        * our SYNACK so stop the SYNACK timer.
+               if (req) {
+                       /* Re-arm the timer because data may have been sent out.
+                        * This is similar to the regular data transmission case
+                        * when new data has just been ack'ed.
+                        *
+                        * (TFO) - we could try to be more aggressive and
+                        * retransmitting any data sooner based on when they
+                        * are sent out.
                          */
-                       if (req != NULL) {
-                               /* Return RST if ack_seq is invalid.
-                                * Note that RFC793 only says to generate a
-                                * DUPACK for it but for TCP Fast Open it seems
-                                * better to treat this case like TCP_SYN_RECV
-                                * above.
-                                */
-                               if (!acceptable)
-                                       return 1;
-                               /* We no longer need the request sock. */
-                               reqsk_fastopen_remove(sk, req, false);
-                               tcp_rearm_rto(sk);
-                       }
-                       if (tp->snd_una == tp->write_seq) {
-                               struct dst_entry *dst;
-
-                               tcp_set_state(sk, TCP_FIN_WAIT2);
-                               sk->sk_shutdown |= SEND_SHUTDOWN;
-
-                               dst = __sk_dst_get(sk);
-                               if (dst)
-                                       dst_confirm(dst);
-
-                               if (!sock_flag(sk, SOCK_DEAD))
-                                       /* Wake up lingering close() */
-                                       sk->sk_state_change(sk);
-                               else {
-                                       int tmo;
-
-                                       if (tp->linger2 < 0 ||
-                                           (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
-                                            after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
-                                               tcp_done(sk);
-                                               NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
-                                               return 1;
-                                       }
+                       tcp_rearm_rto(sk);
+               } else
+                       tcp_init_metrics(sk);
  
-                                       tmo = tcp_fin_time(sk);
-                                       if (tmo > TCP_TIMEWAIT_LEN) {
-                                               inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
-                                       } else if (th->fin || sock_owned_by_user(sk)) {
-                                               /* Bad case. We could lose such FIN otherwise.
-                                                * It is not a big problem, but it looks confusing
-                                                * and not so rare event. We still can lose it now,
-                                                * if it spins in bh_lock_sock(), but it is really
-                                                * marginal case.
-                                                */
-                                               inet_csk_reset_keepalive_timer(sk, tmo);
-                                       } else {
-                                               tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
-                                               goto discard;
-                                       }
-                               }
-                       }
-                       break;
+               /* Prevent spurious tcp_cwnd_restart() on first data packet */
+               tp->lsndtime = tcp_time_stamp;
  
-               case TCP_CLOSING:
-                       if (tp->snd_una == tp->write_seq) {
-                               tcp_time_wait(sk, TCP_TIME_WAIT, 0);
-                               goto discard;
-                       }
+               tcp_initialize_rcv_mss(sk);
+               tcp_fast_path_on(tp);
+               break;
+
+       case TCP_FIN_WAIT1: {
+               struct dst_entry *dst;
+               int tmo;
+
+               /* If we enter the TCP_FIN_WAIT1 state and we are a
+                * Fast Open socket and this is the first acceptable
+                * ACK we have received, this would have acknowledged
+                * our SYNACK so stop the SYNACK timer.
+                */
+               if (req != NULL) {
+                       /* Return RST if ack_seq is invalid.
+                        * Note that RFC793 only says to generate a
+                        * DUPACK for it but for TCP Fast Open it seems
+                        * better to treat this case like TCP_SYN_RECV
+                        * above.
+                        */
+                       if (!acceptable)
+                               return 1;
+                       /* We no longer need the request sock. */
+                       reqsk_fastopen_remove(sk, req, false);
+                       tcp_rearm_rto(sk);
+               }
+               if (tp->snd_una != tp->write_seq)
                         break;
  
-               case TCP_LAST_ACK:
-                       if (tp->snd_una == tp->write_seq) {
-                               tcp_update_metrics(sk);
-                               tcp_done(sk);
-                               goto discard;
-                       }
+               tcp_set_state(sk, TCP_FIN_WAIT2);
+               sk->sk_shutdown |= SEND_SHUTDOWN;
+
+               dst = __sk_dst_get(sk);
+               if (dst)
+                       dst_confirm(dst);
+
+               if (!sock_flag(sk, SOCK_DEAD)) {
+                       /* Wake up lingering close() */
+                       sk->sk_state_change(sk);
                         break;
                 }
+
+               if (tp->linger2 < 0 ||
+                   (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
+                    after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
+                       tcp_done(sk);
+                       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
+                       return 1;
+               }
+
+               tmo = tcp_fin_time(sk);
+               if (tmo > TCP_TIMEWAIT_LEN) {
+                       inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN);
+               } else if (th->fin || sock_owned_by_user(sk)) {
+                       /* Bad case. We could lose such FIN otherwise.
+                        * It is not a big problem, but it looks confusing
+                        * and not so rare event. We still can lose it now,
+                        * if it spins in bh_lock_sock(), but it is really
+                        * marginal case.
+                        */
+                       inet_csk_reset_keepalive_timer(sk, tmo);
+               } else {
+                       tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
+                       goto discard;
+               }
+               break;
+       }
+
+       case TCP_CLOSING:
+               if (tp->snd_una == tp->write_seq) {
+                       tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+                       goto discard;
+               }
+               break;
+
+       case TCP_LAST_ACK:
+               if (tp->snd_una == tp->write_seq) {
+                       tcp_update_metrics(sk);
+                       tcp_done(sk);
+                       goto discard;
+               }
+               break;
         }
  
         /* step 6: check the URG bit */