X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=net%2Fipv4%2Ftcp_input.c;h=26c936930e927fe42ba64da212492b522885e424;hb=f5184d267c1aedb9b7a8cc44e08ff6b8d382c3b5;hp=3c6c7d7a7b0abbeaed332d6c7b4aea15d1302ffb;hpb=a7d632b6b4ad1c92746ed409e41f9dc571ec04e2;p=deliverable%2Flinux.git diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3c6c7d7a7b0a..26c936930e92 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly; #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define FLAG_ANY_PROGRESS (FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED) -#define IsSackFrto() (sysctl_tcp_frto == 0x2) - #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH)) @@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk) u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst->metrics[RTAX_RTO_MIN - 1]; + rto_min = dst_metric(dst, RTAX_RTO_MIN); return rto_min; } @@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_RTTVAR - 1] = m; else dst->metrics[RTAX_RTTVAR-1] -= - (dst->metrics[RTAX_RTTVAR-1] - m)>>2; + (dst_metric(dst, RTAX_RTTVAR) - m)>>2; } if (tp->snd_ssthresh >= 0xFFFF) { @@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk) dst->metrics[RTAX_SSTHRESH-1] = max(tp->snd_cwnd >> 1, tp->snd_ssthresh); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1; + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1; - if (dst->metrics[RTAX_SSTHRESH-1] && + dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && - tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1]) + tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; } if (!dst_metric_locked(dst, RTAX_REORDERING)) { - if (dst->metrics[RTAX_REORDERING-1] < tp->reordering && + if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) dst->metrics[RTAX_REORDERING-1] = tp->reordering; } @@ -1172,8 +1171,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { - u32 start_seq_0 = ntohl(get_unaligned(&sp[0].start_seq)); - u32 end_seq_0 = ntohl(get_unaligned(&sp[0].end_seq)); + u32 start_seq_0 = get_unaligned_be32(&sp[0].start_seq); + u32 end_seq_0 = get_unaligned_be32(&sp[0].end_seq); int dup_sack = 0; if (before(start_seq_0, TCP_SKB_CB(ack_skb)->ack_seq)) { @@ -1181,8 +1180,8 @@ static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb, tcp_dsack_seen(tp); NET_INC_STATS_BH(LINUX_MIB_TCPDSACKRECV); } else if (num_sacks > 1) { - u32 end_seq_1 = ntohl(get_unaligned(&sp[1].end_seq)); - u32 start_seq_1 = ntohl(get_unaligned(&sp[1].start_seq)); + u32 end_seq_1 = get_unaligned_be32(&sp[1].end_seq); + u32 start_seq_1 = get_unaligned_be32(&sp[1].start_seq); if (!after(end_seq_0, end_seq_1) && !before(start_seq_0, start_seq_1)) { @@ -1453,8 +1452,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, for (i = 0; i < num_sacks; i++) { int dup_sack = !i && found_dup_sack; - sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq)); - sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq)); + sp[used_sacks].start_seq = get_unaligned_be32(&sp_wire[i].start_seq); + sp[used_sacks].end_seq = get_unaligned_be32(&sp_wire[i].end_seq); if (!tcp_is_sackblock_valid(tp, dup_sack, sp[used_sacks].start_seq, @@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp) tp->sacked_out = 0; } +static int tcp_is_sackfrto(const struct tcp_sock *tp) +{ + return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp); +} + /* F-RTO can only be used if TCP has never retransmitted anything other than * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here) */ @@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk) if (icsk->icsk_mtup.probe_size) return 0; - if (IsSackFrto()) + if (tcp_is_sackfrto(tp)) return 1; /* Avoid expensive walking of rexmit queue if possible */ @@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk) /* Earlier loss recovery underway (see RFC4138; Appendix B). * The last condition is necessary at least in tp->frto_counter case. */ - if (IsSackFrto() && (tp->frto_counter || + if (tcp_is_sackfrto(tp) && (tp->frto_counter || ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) && after(tp->high_seq, tp->snd_una)) { tp->frto_highmark = tp->high_seq; @@ -2298,7 +2302,7 @@ static inline int tcp_packet_delayed(struct tcp_sock *tp) { return !tp->retrans_stamp || (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && - (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0); + before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp)); } /* Undo procedures. */ @@ -2309,12 +2313,25 @@ static void DBGUNDO(struct sock *sk, const char *msg) struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); - printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", - msg, - NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tcp_left_out(tp), - tp->snd_ssthresh, tp->prior_ssthresh, - tp->packets_out); + if (sk->sk_family == AF_INET) { + printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n", + msg, + NIPQUAD(inet->daddr), ntohs(inet->dport), + tp->snd_cwnd, tcp_left_out(tp), + tp->snd_ssthresh, tp->prior_ssthresh, + tp->packets_out); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (sk->sk_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n", + msg, + NIP6(np->daddr), ntohs(inet->dport), + tp->snd_cwnd, tcp_left_out(tp), + tp->snd_ssthresh, tp->prior_ssthresh, + tp->packets_out); + } +#endif } #else #define DBGUNDO(x...) do { } while (0) @@ -3110,7 +3127,7 @@ static int tcp_process_frto(struct sock *sk, int flag) return 1; } - if (!IsSackFrto() || tcp_is_reno(tp)) { + if (!tcp_is_sackfrto(tp)) { /* RFC4138 shortcoming in step 2; should also have case c): * ACK isn't duplicate nor advances window, e.g., opposite dir * data, winupdate @@ -3327,7 +3344,7 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS && th->syn && !estab) { - u16 in_mss = ntohs(get_unaligned((__be16 *)ptr)); + u16 in_mss = get_unaligned_be16(ptr); if (in_mss) { if (opt_rx->user_mss && opt_rx->user_mss < in_mss) @@ -3356,8 +3373,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, ((estab && opt_rx->tstamp_ok) || (!estab && sysctl_tcp_timestamps))) { opt_rx->saw_tstamp = 1; - opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr)); - opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4))); + opt_rx->rcv_tsval = get_unaligned_be32(ptr); + opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); } break; case TCPOPT_SACK_PERM: @@ -3841,8 +3858,28 @@ static void tcp_ofo_queue(struct sock *sk) } } +static int tcp_prune_ofo_queue(struct sock *sk); static int tcp_prune_queue(struct sock *sk); +static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) +{ + if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + !sk_rmem_schedule(sk, size)) { + + if (tcp_prune_queue(sk) < 0) + return -1; + + if (!sk_rmem_schedule(sk, size)) { + if (!tcp_prune_ofo_queue(sk)) + return -1; + + if (!sk_rmem_schedule(sk, size)) + return -1; + } + } + return 0; +} + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = tcp_hdr(skb); @@ -3892,12 +3929,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) if (eaten <= 0) { queue_and_out: if (eaten < 0 && - (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb->truesize))) { - if (tcp_prune_queue(sk) < 0 || - !sk_rmem_schedule(sk, skb->truesize)) - goto drop; - } + tcp_try_rmem_schedule(sk, skb->truesize)) + goto drop; + skb_set_owner_r(skb, sk); __skb_queue_tail(&sk->sk_receive_queue, skb); } @@ -3966,12 +4000,8 @@ drop: TCP_ECN_check_ce(tp, skb); - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || - !sk_rmem_schedule(sk, skb->truesize)) { - if (tcp_prune_queue(sk) < 0 || - !sk_rmem_schedule(sk, skb->truesize)) - goto drop; - } + if (tcp_try_rmem_schedule(sk, skb->truesize)) + goto drop; /* Disable header prediction. */ tp->pred_flags = 0; @@ -4198,6 +4228,32 @@ static void tcp_collapse_ofo_queue(struct sock *sk) } } +/* + * Purge the out-of-order queue. + * Return true if queue was pruned. + */ +static int tcp_prune_ofo_queue(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int res = 0; + + if (!skb_queue_empty(&tp->out_of_order_queue)) { + NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); + __skb_queue_purge(&tp->out_of_order_queue); + + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + sk_mem_reclaim(sk); + res = 1; + } + return res; +} + /* Reduce allocated memory if we can, trying to get * the socket within its memory limits again. * @@ -4231,20 +4287,7 @@ static int tcp_prune_queue(struct sock *sk) /* Collapsing did not help, destructive actions follow. * This must not ever occur. */ - /* First, purge the out_of_order queue. */ - if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); - __skb_queue_purge(&tp->out_of_order_queue); - - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tcp_is_sack(tp)) - tcp_sack_reset(&tp->rx_opt); - sk_mem_reclaim(sk); - } + tcp_prune_ofo_queue(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) return 0; @@ -4886,8 +4929,7 @@ step5: tcp_data_snd_check(sk); tcp_ack_snd_check(sk); - if (tcp_defer_accept_check(sk)) - return -1; + tcp_defer_accept_check(sk); return 0; csum_error: