net: include/net/sock.h cleanup
[deliverable/linux.git] / net / ipv4 / tcp.c
index 47e2f4972f7976e70188f62bc7ed58c203aa34e9..e8a80d0b5b3c8a0270290ad94a9d1c28dc82c14c 100644 (file)
@@ -363,6 +363,71 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
        return period;
 }
 
+/* Address-family independent initialization for a tcp_sock.
+ *
+ * NOTE: A lot of things set to zero explicitly by call to
+ *       sk_alloc() so need not be done here.
+ */
+void tcp_init_sock(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       skb_queue_head_init(&tp->out_of_order_queue);
+       tcp_init_xmit_timers(sk);
+       tcp_prequeue_init(tp);
+
+       icsk->icsk_rto = TCP_TIMEOUT_INIT;
+       tp->mdev = TCP_TIMEOUT_INIT;
+
+       /* So many TCP implementations out there (incorrectly) count the
+        * initial SYN frame in their delayed-ACK and congestion control
+        * algorithms that we must have the following bandaid to talk
+        * efficiently to them.  -DaveM
+        */
+       tp->snd_cwnd = TCP_INIT_CWND;
+
+       /* See draft-stevens-tcpca-spec-01 for discussion of the
+        * initialization of these values.
+        */
+       tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+       tp->snd_cwnd_clamp = ~0;
+       tp->mss_cache = TCP_MSS_DEFAULT;
+
+       tp->reordering = sysctl_tcp_reordering;
+       tcp_enable_early_retrans(tp);
+       icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+
+       sk->sk_state = TCP_CLOSE;
+
+       sk->sk_write_space = sk_stream_write_space;
+       sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
+
+       icsk->icsk_sync_mss = tcp_sync_mss;
+
+       /* TCP Cookie Transactions */
+       if (sysctl_tcp_cookie_size > 0) {
+               /* Default, cookies without s_data_payload. */
+               tp->cookie_values =
+                       kzalloc(sizeof(*tp->cookie_values),
+                               sk->sk_allocation);
+               if (tp->cookie_values != NULL)
+                       kref_init(&tp->cookie_values->kref);
+       }
+       /* Presumed zeroed, in order of appearance:
+        *      cookie_in_always, cookie_out_never,
+        *      s_data_constant, s_data_in, s_data_out
+        */
+       sk->sk_sndbuf = sysctl_tcp_wmem[1];
+       sk->sk_rcvbuf = sysctl_tcp_rmem[1];
+
+       local_bh_disable();
+       sock_update_memcg(sk);
+       sk_sockets_allocated_inc(sk);
+       local_bh_enable();
+}
+EXPORT_SYMBOL(tcp_init_sock);
+
 /*
  *     Wait for a TCP event.
  *
@@ -784,9 +849,10 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
        while (psize > 0) {
                struct sk_buff *skb = tcp_write_queue_tail(sk);
                struct page *page = pages[poffset / PAGE_SIZE];
-               int copy, i, can_coalesce;
+               int copy, i;
                int offset = poffset % PAGE_SIZE;
                int size = min_t(size_t, psize, PAGE_SIZE - offset);
+               bool can_coalesce;
 
                if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0) {
 new_segment:
@@ -912,39 +978,6 @@ static inline int select_size(const struct sock *sk, bool sg)
        return tmp;
 }
 
-static int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
-{
-       struct sk_buff *skb;
-       struct tcp_skb_cb *cb;
-       struct tcphdr *th;
-
-       skb = alloc_skb(size + sizeof(*th), sk->sk_allocation);
-       if (!skb)
-               goto err;
-
-       th = (struct tcphdr *)skb_put(skb, sizeof(*th));
-       skb_reset_transport_header(skb);
-       memset(th, 0, sizeof(*th));
-
-       if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
-               goto err_free;
-
-       cb = TCP_SKB_CB(skb);
-
-       TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
-       TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + size;
-       TCP_SKB_CB(skb)->ack_seq = tcp_sk(sk)->snd_una - 1;
-
-       tcp_queue_rcv(sk, skb, sizeof(*th));
-
-       return size;
-
-err_free:
-       kfree_skb(skb);
-err:
-       return -ENOMEM;
-}
-
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                size_t size)
 {
@@ -1440,11 +1473,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                                break;
                }
                if (tcp_hdr(skb)->fin) {
-                       sk_eat_skb(sk, skb, 0);
+                       sk_eat_skb(sk, skb, false);
                        ++seq;
                        break;
                }
-               sk_eat_skb(sk, skb, 0);
+               sk_eat_skb(sk, skb, false);
                if (!desc->count)
                        break;
                tp->copied_seq = seq;
@@ -1480,7 +1513,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        int target;             /* Read at least this many bytes */
        long timeo;
        struct task_struct *user_recv = NULL;
-       int copied_early = 0;
+       bool copied_early = false;
        struct sk_buff *skb;
        u32 urg_hole = 0;
 
@@ -1712,9 +1745,9 @@ do_prequeue:
                }
                if ((flags & MSG_PEEK) &&
                    (peek_seq - copied - urg_hole != tp->copied_seq)) {
-                       if (net_ratelimit())
-                               printk(KERN_DEBUG "TCP(%s:%d): Application bug, race in MSG_PEEK.\n",
-                                      current->comm, task_pid_nr(current));
+                       net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n",
+                                           current->comm,
+                                           task_pid_nr(current));
                        peek_seq = tp->copied_seq;
                }
                continue;
@@ -1768,7 +1801,7 @@ do_prequeue:
                                dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
 
                                if ((offset + used) == skb->len)
-                                       copied_early = 1;
+                                       copied_early = true;
 
                        } else
 #endif
@@ -1802,7 +1835,7 @@ skip_copy:
                        goto found_fin_ok;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                       copied_early = 0;
+                       copied_early = false;
                }
                continue;
 
@@ -1811,7 +1844,7 @@ skip_copy:
                ++*seq;
                if (!(flags & MSG_PEEK)) {
                        sk_eat_skb(sk, skb, copied_early);
-                       copied_early = 0;
+                       copied_early = false;
                }
                break;
        } while (len > 0);
@@ -1969,10 +2002,10 @@ bool tcp_check_oom(struct sock *sk, int shift)
        too_many_orphans = tcp_too_many_orphans(sk, shift);
        out_of_socket_memory = tcp_out_of_memory(sk);
 
-       if (too_many_orphans && net_ratelimit())
-               pr_info("too many orphaned sockets\n");
-       if (out_of_socket_memory && net_ratelimit())
-               pr_info("out of memory -- consider tuning tcp_mem\n");
+       if (too_many_orphans)
+               net_info_ratelimited("too many orphaned sockets\n");
+       if (out_of_socket_memory)
+               net_info_ratelimited("out of memory -- consider tuning tcp_mem\n");
        return too_many_orphans || out_of_socket_memory;
 }
 
@@ -2218,6 +2251,48 @@ static inline int tcp_can_repair_sock(struct sock *sk)
                ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_ESTABLISHED));
 }
 
+static int tcp_repair_options_est(struct tcp_sock *tp,
+               struct tcp_repair_opt __user *optbuf, unsigned int len)
+{
+       struct tcp_repair_opt opt;
+
+       while (len >= sizeof(opt)) {
+               if (copy_from_user(&opt, optbuf, sizeof(opt)))
+                       return -EFAULT;
+
+               optbuf++;
+               len -= sizeof(opt);
+
+               switch (opt.opt_code) {
+               case TCPOPT_MSS:
+                       tp->rx_opt.mss_clamp = opt.opt_val;
+                       break;
+               case TCPOPT_WINDOW:
+                       if (opt.opt_val > 14)
+                               return -EFBIG;
+
+                       tp->rx_opt.snd_wscale = opt.opt_val;
+                       break;
+               case TCPOPT_SACK_PERM:
+                       if (opt.opt_val != 0)
+                               return -EINVAL;
+
+                       tp->rx_opt.sack_ok |= TCP_SACK_SEEN;
+                       if (sysctl_tcp_fack)
+                               tcp_enable_fack(tp);
+                       break;
+               case TCPOPT_TIMESTAMP:
+                       if (opt.opt_val != 0)
+                               return -EINVAL;
+
+                       tp->rx_opt.tstamp_ok = 1;
+                       break;
+               }
+       }
+
+       return 0;
+}
+
 /*
  *     Socket option code for TCP.
  */
@@ -2388,6 +2463,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                        err = -EINVAL;
                else
                        tp->thin_dupack = val;
+                       if (tp->thin_dupack)
+                               tcp_disable_early_retrans(tp);
                break;
 
        case TCP_REPAIR:
@@ -2426,6 +2503,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                        err = -EINVAL;
                break;
 
+       case TCP_REPAIR_OPTIONS:
+               if (!tp->repair)
+                       err = -EINVAL;
+               else if (sk->sk_state == TCP_ESTABLISHED)
+                       err = tcp_repair_options_est(tp,
+                                       (struct tcp_repair_opt __user *)optval,
+                                       optlen);
+               else
+                       err = -EPERM;
+               break;
+
        case TCP_CORK:
                /* When set indicates to always queue non-full frames.
                 * Later the user clears this option and we transmit
@@ -2659,6 +2747,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
                val = tp->mss_cache;
                if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
                        val = tp->rx_opt.user_mss;
+               if (tp->repair)
+                       val = tp->rx_opt.mss_clamp;
                break;
        case TCP_NODELAY:
                val = !!(tp->nonagle&TCP_NAGLE_OFF);
@@ -3392,7 +3482,7 @@ void __init tcp_init(void)
 {
        struct sk_buff *skb = NULL;
        unsigned long limit;
-       int max_share, cnt;
+       int max_rshare, max_wshare, cnt;
        unsigned int i;
        unsigned long jiffy = jiffies;
 
@@ -3452,15 +3542,16 @@ void __init tcp_init(void)
        tcp_init_mem(&init_net);
        /* Set per-socket limits to no more than 1/128 the pressure threshold */
        limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7);
-       max_share = min(4UL*1024*1024, limit);
+       max_wshare = min(4UL*1024*1024, limit);
+       max_rshare = min(6UL*1024*1024, limit);
 
        sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_wmem[1] = 16*1024;
-       sysctl_tcp_wmem[2] = max(64*1024, max_share);
+       sysctl_tcp_wmem[2] = max(64*1024, max_wshare);
 
        sysctl_tcp_rmem[0] = SK_MEM_QUANTUM;
        sysctl_tcp_rmem[1] = 87380;
-       sysctl_tcp_rmem[2] = max(87380, max_share);
+       sysctl_tcp_rmem[2] = max(87380, max_rshare);
 
        pr_info("Hash tables configured (established %u bind %u)\n",
                tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
This page took 0.029811 seconds and 5 git commands to generate.