tcp: add tcp_in_slow_start helper

[deliverable/linux.git] / include / net / tcp.h
diff --git a/include/net/tcp.h b/include/net/tcp.h

index 6d204f3f9df8cafb82d856db08769a7d24dfd79e..dba22fc1b065a70b9604872b1460c7b2ea24db1d 100644 (file)
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -286,6 +286,14 @@ extern atomic_long_t tcp_memory_allocated;
  extern struct percpu_counter tcp_sockets_allocated;
  extern int tcp_memory_pressure;
  
+/* optimized version of sk_under_memory_pressure() for TCP sockets */
+static inline bool tcp_under_memory_pressure(const struct sock *sk)
+{
+       if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+               return !!sk->sk_cgrp->memory_pressure;
+
+       return tcp_memory_pressure;
+}
  /*
   * The next routines deal with comparing 32 bit unsigned ints
   * and worry about wraparound (automatic with unsigned arithmetic).
@@ -311,6 +319,8 @@ static inline bool tcp_out_of_memory(struct sock *sk)
         return false;
  }
  
+void sk_forced_mem_schedule(struct sock *sk, int size);
+
  static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
  {
         struct percpu_counter *ocp = sk->sk_prot->orphan_count;
@@ -326,18 +336,6 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift)
  
  bool tcp_check_oom(struct sock *sk, int shift);
  
-/* syncookies: remember time of last synqueue overflow */
-static inline void tcp_synq_overflow(struct sock *sk)
-{
-       tcp_sk(sk)->rx_opt.ts_recent_stamp = jiffies;
-}
-
-/* syncookies: no recent synqueue overflow on this listening socket? */
-static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
-{
-       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
-       return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
-}
  
  extern struct proto tcp_prot;
  
@@ -471,6 +469,9 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size);
  void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb);
  
  /* From syncookies.c */
+struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+                                struct request_sock *req,
+                                struct dst_entry *dst);
  int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
                       u32 cookie);
  struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
@@ -483,13 +484,35 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb);
   * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if
   * the counter advances immediately after a cookie is generated).
   */
-#define MAX_SYNCOOKIE_AGE 2
+#define MAX_SYNCOOKIE_AGE      2
+#define TCP_SYNCOOKIE_PERIOD   (60 * HZ)
+#define TCP_SYNCOOKIE_VALID    (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD)
+
+/* syncookies: remember time of last synqueue overflow
+ * But do not dirty this field too often (once per second is enough)
+ */
+static inline void tcp_synq_overflow(struct sock *sk)
+{
+       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+       unsigned long now = jiffies;
+
+       if (time_after(now, last_overflow + HZ))
+               tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+}
+
+/* syncookies: no recent synqueue overflow on this listening socket? */
+static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
+{
+       unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
+
+       return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID);
+}
  
  static inline u32 tcp_cookie_time(void)
  {
         u64 val = get_jiffies_64();
  
-       do_div(val, 60 * HZ);
+       do_div(val, TCP_SYNCOOKIE_PERIOD);
         return val;
  }
  
@@ -527,7 +550,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
  
  void tcp_send_probe0(struct sock *);
  void tcp_send_partial(struct sock *);
-int tcp_write_wakeup(struct sock *);
+int tcp_write_wakeup(struct sock *, int mib);
  void tcp_send_fin(struct sock *sk);
  void tcp_send_active_reset(struct sock *sk, gfp_t priority);
  int tcp_send_synack(struct sock *);
@@ -692,6 +715,8 @@ static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
  #define TCPHDR_ECE 0x40
  #define TCPHDR_CWR 0x80
  
+#define TCPHDR_SYN_ECN (TCPHDR_SYN | TCPHDR_ECE | TCPHDR_CWR)
+
  /* This is what the send packet queuing engine uses to pass
   * TCP per-packet control information to the transmission code.
   * We also store the host-order sequence numbers in here too.
@@ -705,11 +730,14 @@ struct tcp_skb_cb {
                 /* Note : tcp_tw_isn is used in input path only
                  *        (isn chosen by tcp_timewait_state_process())
                  *
-                *        tcp_gso_segs is used in write queue only,
-                *        cf tcp_skb_pcount()
+                *        tcp_gso_segs/size are used in write queue only,
+                *        cf tcp_skb_pcount()/tcp_skb_mss()
                  */
                 __u32           tcp_tw_isn;
-               __u32           tcp_gso_segs;
+               struct {
+                       u16     tcp_gso_segs;
+                       u16     tcp_gso_size;
+               };
         };
         __u8            tcp_flags;      /* TCP header flags. (tcp[13])  */
  
@@ -765,10 +793,10 @@ static inline void tcp_skb_pcount_add(struct sk_buff *skb, int segs)
         TCP_SKB_CB(skb)->tcp_gso_segs += segs;
  }
  
-/* This is valid iff tcp_skb_pcount() > 1. */
+/* This is valid iff skb is in write queue and tcp_skb_pcount() > 1. */
  static inline int tcp_skb_mss(const struct sk_buff *skb)
  {
-       return skb_shinfo(skb)->gso_size;
+       return TCP_SKB_CB(skb)->tcp_gso_size;
  }
  
  /* Events passed to congestion control interface */
@@ -961,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
  
  #define TCP_INFINITE_SSTHRESH  0x7fffffff
  
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+       return tp->snd_cwnd <= tp->snd_ssthresh;
+}
+
  static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
  {
         return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
@@ -1037,20 +1070,37 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
         const struct tcp_sock *tp = tcp_sk(sk);
  
         /* If in slow start, ensure cwnd grows to twice what was ACKed. */
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                 return tp->snd_cwnd < 2 * tp->max_packets_out;
  
         return tp->is_cwnd_limited;
  }
  
-static inline void tcp_check_probe_timer(struct sock *sk)
+/* Something is really bad, we could not queue an additional packet,
+ * because qdisc is full or receiver sent a 0 window.
+ * We do not want to add fuel to the fire, or abort too early,
+ * so make sure the timer we arm now is at least 200ms in the future,
+ * regardless of current icsk_rto value (as it could be ~2ms)
+ */
+static inline unsigned long tcp_probe0_base(const struct sock *sk)
  {
-       const struct tcp_sock *tp = tcp_sk(sk);
-       const struct inet_connection_sock *icsk = inet_csk(sk);
+       return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN);
+}
+
+/* Variant of inet_csk_rto_backoff() used for zero window probes */
+static inline unsigned long tcp_probe0_when(const struct sock *sk,
+                                           unsigned long max_when)
+{
+       u64 when = (u64)tcp_probe0_base(sk) << inet_csk(sk)->icsk_backoff;
+
+       return (unsigned long)min_t(u64, when, max_when);
+}
  
-       if (!tp->packets_out && !icsk->icsk_pending)
+static inline void tcp_check_probe_timer(struct sock *sk)
+{
+       if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-                                         icsk->icsk_rto, TCP_RTO_MAX);
+                                         tcp_probe0_base(sk), TCP_RTO_MAX);
  }
  
  static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)