return flag;
}
-/* RTO occurred, but do not yet enter loss state. Instead, transmit two new
- * segments to see from the next ACKs whether any data was really missing.
- * If the RTO was spurious, new ACKs should arrive.
+/* F-RTO can only be used if these conditions are satisfied:
+ * - there must be some unsent new data
+ * - the advertised window should allow sending it
+ */
+int tcp_use_frto(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return (sysctl_tcp_frto && sk->sk_send_head &&
+ !after(TCP_SKB_CB(sk->sk_send_head)->end_seq,
+ tp->snd_una + tp->snd_wnd));
+}
+
+/* RTO occurred, but do not yet enter Loss state. Instead, defer RTO
+ * recovery a bit and use heuristics in tcp_process_frto() to detect if
+ * the RTO was spurious.
+ *
+ * Do like tcp_enter_loss() would; when RTO expires the second time it
+ * does:
+ * "Reduce ssthresh if it has not yet been made inside this window."
*/
void tcp_enter_frto(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- tp->frto_counter = 1;
-
- if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
+ if ((!tp->frto_counter && icsk->icsk_ca_state <= TCP_CA_Disorder) ||
tp->snd_una == tp->high_seq ||
- (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
+ ((icsk->icsk_ca_state == TCP_CA_Loss || tp->frto_counter) &&
+ !icsk->icsk_retransmits)) {
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tcp_ca_event(sk, CA_EVENT_FRTO);
}
tcp_sync_left_out(tp);
- tcp_set_ca_state(sk, TCP_CA_Open);
+ tcp_set_ca_state(sk, TCP_CA_Disorder);
+ tp->high_seq = tp->snd_nxt;
tp->frto_highmark = tp->snd_nxt;
+ tp->frto_counter = 1;
}
/* Enter Loss state after F-RTO was applied. Dupack arrived after RTO,
* which indicates that we should follow the traditional RTO recovery,
* i.e. mark everything lost and do go-back-N retransmission.
*/
-static void tcp_enter_frto_loss(struct sock *sk)
+static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
}
tcp_sync_left_out(tp);
- tp->snd_cwnd = tp->frto_counter + tcp_packets_in_flight(tp)+1;
+ tp->snd_cwnd = tcp_packets_in_flight(tp) + allowed_segments;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->undo_marker = 0;
/* E. Check state exit conditions. State can be terminated
* when high_seq is ACKed. */
if (icsk->icsk_ca_state == TCP_CA_Open) {
- if (!sysctl_tcp_frto)
- BUG_TRAP(tp->retrans_out == 0);
+ BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (icsk->icsk_ca_state) {
tcp_moderate_cwnd(tp);
}
-static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
+/* F-RTO spurious RTO detection algorithm (RFC4138)
+ *
+ * F-RTO affects during two new ACKs following RTO (well, almost, see inline
+ * comments). State (ACK number) is kept in frto_counter. When ACK advances
+ * window (but not to or beyond highest sequence sent before RTO):
+ * On First ACK, send two new segments out.
+ * On Second ACK, RTO was likely spurious. Do spurious response (response
+ * algorithm is not part of the F-RTO detection algorithm
+ * given in RFC4138 but can be selected separately).
+ * Otherwise (basically on duplicate ACK), RTO was (likely) caused by a loss
+ * and TCP falls back to conventional RTO recovery.
+ *
+ * Rationale: if the RTO was spurious, new ACKs should arrive from the
+ * original window even after we transmit two new data segments.
+ *
+ * F-RTO is implemented (mainly) in four functions:
+ * - tcp_use_frto() is used to determine if TCP is can use F-RTO
+ * - tcp_enter_frto() prepares TCP state on RTO if F-RTO is used, it is
+ * called when tcp_use_frto() showed green light
+ * - tcp_process_frto() handles incoming ACKs during F-RTO algorithm
+ * - tcp_enter_frto_loss() is called if there is not enough evidence
+ * to prove that the RTO is indeed spurious. It transfers the control
+ * from F-RTO to the conventional RTO recovery
+ */
+static void tcp_process_frto(struct sock *sk, u32 prior_snd_una, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
tcp_sync_left_out(tp);
- if (tp->snd_una == prior_snd_una ||
- !before(tp->snd_una, tp->frto_highmark)) {
- /* RTO was caused by loss, start retransmitting in
- * go-back-N slow start
- */
- tcp_enter_frto_loss(sk);
+ /* Duplicate the behavior from Loss state (fastretrans_alert) */
+ if (flag&FLAG_DATA_ACKED)
+ inet_csk(sk)->icsk_retransmits = 0;
+
+ if (!before(tp->snd_una, tp->frto_highmark)) {
+ tcp_enter_frto_loss(sk, tp->frto_counter + 1);
+ return;
+ }
+
+ /* RFC4138 shortcoming in step 2; should also have case c): ACK isn't
+ * duplicate nor advances window, e.g., opposite dir data, winupdate
+ */
+ if ((tp->snd_una == prior_snd_una) && (flag&FLAG_NOT_DUP) &&
+ !(flag&FLAG_FORWARD_PROGRESS))
+ return;
+
+ if (!(flag&FLAG_DATA_ACKED)) {
+ tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3));
return;
}
if (tp->frto_counter == 1) {
- /* First ACK after RTO advances the window: allow two new
- * segments out.
- */
tp->snd_cwnd = tcp_packets_in_flight(tp) + 2;
- } else {
+ } else /* frto_counter == 2 */ {
tcp_conservative_spur_to_response(tp);
}
- /* F-RTO affects on two new ACKs following RTO.
- * At latest on third ACK the TCP behavior is back to normal.
- */
tp->frto_counter = (tp->frto_counter + 1) % 3;
}
flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
if (tp->frto_counter)
- tcp_process_frto(sk, prior_snd_una);
+ tcp_process_frto(sk, prior_snd_una, flag);
if (tcp_ack_is_dubious(sk, flag)) {
/* Advance CWND, if state allows this. */