X-Git-Url: http://drtracing.org/?a=blobdiff_plain;f=net%2Fipv4%2Fudp.c;h=76d52d37d6ac8916a5cbd69fd913b14fb00b1cf6;hb=3b1e0a655f8eba44ab1ee2a1068d169ccfb853b9;hp=03c400ca14c5af07db1ab22686d95621b5cc8813;hpb=c4888f9ffafe7db107b7eafb3a68eaeeff3779c3;p=deliverable%2Flinux.git diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 03c400ca14c5..76d52d37d6ac 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include #include @@ -110,39 +111,54 @@ */ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; +EXPORT_SYMBOL(udp_statistics); + +DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; +EXPORT_SYMBOL(udp_stats_in6); struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static inline int __udp_lib_lport_inuse(__u16 num, +int sysctl_udp_mem[3] __read_mostly; +int sysctl_udp_rmem_min __read_mostly; +int sysctl_udp_wmem_min __read_mostly; + +EXPORT_SYMBOL(sysctl_udp_mem); +EXPORT_SYMBOL(sysctl_udp_rmem_min); +EXPORT_SYMBOL(sysctl_udp_wmem_min); + +atomic_t udp_memory_allocated; +EXPORT_SYMBOL(udp_memory_allocated); + +static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, const struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_hash == num) + if (sock_net(sk) == net && sk->sk_hash == num) return 1; return 0; } /** - * __udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 + * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up - * @udptable: hash list table, must be of UDP_HTABLE_SIZE * @saddr_comp: AF-dependent comparison of bound local IP addresses */ -int __udp_lib_get_port(struct sock *sk, unsigned short snum, - struct hlist_head udptable[], +int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*saddr_comp)(const struct sock *sk1, const struct sock *sk2 ) ) { + struct hlist_head *udptable = sk->sk_prot->h.udp_hash; struct hlist_node *node; struct hlist_head *head; struct sock *sk2; int error = 1; + struct net *net = sock_net(sk); write_lock_bh(&udp_hash_lock); @@ -182,7 +198,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, /* 2nd pass: find hole in shortest hash chain */ rover = best; for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(rover, udptable)) + if (! __udp_lib_lport_inuse(net, rover, udptable)) goto gotit; rover += UDP_HTABLE_SIZE; if (rover > high) @@ -202,6 +218,7 @@ gotit: sk_for_each(sk2, node, head) if (sk2->sk_hash == snum && sk2 != sk && + sock_net(sk2) == net && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -214,7 +231,7 @@ gotit: if (sk_unhashed(sk)) { head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); - sock_prot_inc_use(sk->sk_prot); + sock_prot_inuse_add(sk->sk_prot, 1); } error = 0; fail: @@ -222,13 +239,7 @@ fail: return error; } -int udp_get_port(struct sock *sk, unsigned short snum, - int (*scmp)(const struct sock *, const struct sock *)) -{ - return __udp_lib_get_port(sk, snum, udp_hash, scmp); -} - -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -237,17 +248,17 @@ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) inet1->rcv_saddr == inet2->rcv_saddr )); } -static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +int udp_v4_get_port(struct sock *sk, unsigned short snum) { - return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); + return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); } /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ -static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, - __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) { struct sock *sk, *result = NULL; struct hlist_node *node; @@ -258,7 +269,8 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { + if (sock_net(sk) == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { int score = (sk->sk_family == PF_INET ? 1 : 0); if (inet->rcv_saddr) { if (inet->rcv_saddr != daddr) @@ -345,8 +357,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) int harderr; int err; - sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, - skb->dev->ifindex, udptable ); + sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -402,7 +414,7 @@ out: void udp_err(struct sk_buff *skb, u32 info) { - return __udp4_lib_err(skb, info, udp_hash); + __udp4_lib_err(skb, info, udp_hash); } /* @@ -471,6 +483,7 @@ static int udp_push_pending_frames(struct sock *sk) struct sk_buff *skb; struct udphdr *uh; int err = 0; + int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; /* Grab the skbuff where UDP header space exists. */ @@ -486,7 +499,7 @@ static int udp_push_pending_frames(struct sock *sk) uh->len = htons(up->len); uh->check = 0; - if (up->pcflag) /* UDP-Lite */ + if (is_udplite) /* UDP-Lite */ csum = udplite_csum_outgoing(sk, skb); else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ @@ -514,7 +527,7 @@ out: up->len = 0; up->pending = 0; if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); return err; } @@ -531,7 +544,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, __be32 daddr, faddr, saddr; __be16 dport; u8 tos; - int err, is_udplite = up->pcflag; + int err, is_udplite = IS_UDPLITE(sk); int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -594,7 +607,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); + err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) return err; if (ipc.opt) @@ -621,7 +634,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, connected = 0; } - if (MULTICAST(daddr)) { + if (ipv4_is_multicast(daddr)) { if (!ipc.oif) ipc.oif = inet->mc_index; if (!saddr) @@ -643,7 +656,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { .sport = inet->sport, .dport = dport } } }; security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&rt, &fl, sk, 1); + err = ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 1); if (err) { if (err == -ENETUNREACH) IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); @@ -825,6 +838,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; unsigned int ulen, copied; + int peeked; int err; int is_udplite = IS_UDPLITE(sk); @@ -838,7 +852,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return ip_recv_error(sk, msg, len); try_again: - skb = skb_recv_datagram(sk, flags, noblock, &err); + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); if (!skb) goto out; @@ -873,6 +888,9 @@ try_again: if (err) goto out_free; + if (!peeked) + UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ @@ -891,14 +909,17 @@ try_again: err = ulen; out_free: + lock_sock(sk); skb_free_datagram(sk, skb); + release_sock(sk); out: return err; csum_copy_err: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); - - skb_kill_datagram(sk, skb, flags); + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); if (noblock) return -EAGAIN; @@ -940,6 +961,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); int rc; + int is_udplite = IS_UDPLITE(sk); /* * Charge it to the socket, dropping if the queue is full. @@ -967,7 +989,8 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) ret = (*up->encap_rcv)(sk, skb); if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + is_udplite); return -ret; } } @@ -978,7 +1001,7 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { /* * MIB statistics other than incrementing the error count are @@ -1019,15 +1042,14 @@ int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, up->pcflag); + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; } - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, up->pcflag); return 0; drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, up->pcflag); + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); kfree_skb(skb); return -1; } @@ -1062,7 +1084,15 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { - int ret = udp_queue_rcv_skb(sk, skb1); + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + if (ret > 0) /* we should probably re-process instead * of dropping packets here. */ @@ -1151,11 +1181,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - inet_iif(skb), udptable); + sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); if (sk != NULL) { - int ret = udp_queue_rcv_skb(sk, skb); + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); sock_put(sk); /* a return value > 0 means to resubmit the input, but @@ -1236,6 +1272,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val; int err = 0; + int is_udplite = IS_UDPLITE(sk); if (optlen packet length is handled by send module. */ case UDPLITE_SEND_CSCOV: - if (!up->pcflag) /* Disable the option on UDP sockets */ + if (!is_udplite) /* Disable the option on UDP sockets */ return -ENOPROTOOPT; if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ val = 8; @@ -1289,7 +1326,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, * sense, this should be set to at least 8 (as done below). If zero is * used, this again means full checksum coverage. */ case UDPLITE_RECV_CSCOV: - if (!up->pcflag) /* Disable the option on UDP sockets */ + if (!is_udplite) /* Disable the option on UDP sockets */ return -ENOPROTOOPT; if (val != 0 && val < 8) /* Avoid silly minimal values. */ val = 8; @@ -1449,7 +1486,12 @@ struct proto udp_prot = { .hash = udp_lib_hash, .unhash = udp_lib_unhash, .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), + .h.udp_hash = udp_hash, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, @@ -1464,10 +1506,13 @@ static struct sock *udp_get_first(struct seq_file *seq) { struct sock *sk; struct udp_iter_state *state = seq->private; + struct net *net = state->net; for (state->bucket = 0; state->bucket < UDP_HTABLE_SIZE; ++state->bucket) { struct hlist_node *node; sk_for_each(sk, node, state->hashtable + state->bucket) { + if (sock_net(sk) != net) + continue; if (sk->sk_family == state->family) goto found; } @@ -1480,12 +1525,13 @@ found: static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) { struct udp_iter_state *state = seq->private; + struct net *net = state->net; do { sk = sk_next(sk); try_again: ; - } while (sk && sk->sk_family != state->family); + } while (sk && (sock_net(sk) != net || sk->sk_family != state->family)); if (!sk && ++state->bucket < UDP_HTABLE_SIZE) { sk = sk_head(state->hashtable + state->bucket); @@ -1505,6 +1551,7 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) } static void *udp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(udp_hash_lock) { read_lock(&udp_hash_lock); return *pos ? udp_get_idx(seq, *pos-1) : (void *)1; @@ -1524,6 +1571,7 @@ static void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void udp_seq_stop(struct seq_file *seq, void *v) + __releases(udp_hash_lock) { read_unlock(&udp_hash_lock); } @@ -1532,33 +1580,53 @@ static int udp_seq_open(struct inode *inode, struct file *file) { struct udp_seq_afinfo *afinfo = PDE(inode)->data; struct seq_file *seq; + struct net *net; int rc = -ENOMEM; struct udp_iter_state *s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; + + rc = -ENXIO; + net = get_proc_net(inode); + if (!net) + goto out_kfree; + s->family = afinfo->family; s->hashtable = afinfo->hashtable; s->seq_ops.start = udp_seq_start; s->seq_ops.next = udp_seq_next; s->seq_ops.show = afinfo->seq_show; s->seq_ops.stop = udp_seq_stop; + s->net = net; rc = seq_open(file, &s->seq_ops); if (rc) - goto out_kfree; + goto out_put_net; - seq = file->private_data; + seq = file->private_data; seq->private = s; out: return rc; +out_put_net: + put_net(net); out_kfree: kfree(s); goto out; } +static int udp_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct udp_iter_state *s = seq->private; + + put_net(s->net); + seq_release_private(inode, file); + return 0; +} + /* ------------------------------------------------------------------------ */ -int udp_proc_register(struct udp_seq_afinfo *afinfo) +int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo) { struct proc_dir_entry *p; int rc = 0; @@ -1569,9 +1637,9 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) afinfo->seq_fops->open = udp_seq_open; afinfo->seq_fops->read = seq_read; afinfo->seq_fops->llseek = seq_lseek; - afinfo->seq_fops->release = seq_release_private; + afinfo->seq_fops->release = udp_seq_release; - p = proc_net_fops_create(&init_net, afinfo->name, S_IRUGO, afinfo->seq_fops); + p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops); if (p) p->data = afinfo; else @@ -1579,11 +1647,11 @@ int udp_proc_register(struct udp_seq_afinfo *afinfo) return rc; } -void udp_proc_unregister(struct udp_seq_afinfo *afinfo) +void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo) { if (!afinfo) return; - proc_net_remove(&init_net, afinfo->name); + proc_net_remove(net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } @@ -1633,27 +1701,61 @@ static struct udp_seq_afinfo udp4_seq_afinfo = { .seq_fops = &udp4_seq_fops, }; +static int udp4_proc_init_net(struct net *net) +{ + return udp_proc_register(net, &udp4_seq_afinfo); +} + +static void udp4_proc_exit_net(struct net *net) +{ + udp_proc_unregister(net, &udp4_seq_afinfo); +} + +static struct pernet_operations udp4_net_ops = { + .init = udp4_proc_init_net, + .exit = udp4_proc_exit_net, +}; + int __init udp4_proc_init(void) { - return udp_proc_register(&udp4_seq_afinfo); + return register_pernet_subsys(&udp4_net_ops); } void udp4_proc_exit(void) { - udp_proc_unregister(&udp4_seq_afinfo); + unregister_pernet_subsys(&udp4_net_ops); } #endif /* CONFIG_PROC_FS */ +void __init udp_init(void) +{ + unsigned long limit; + + /* Set the pressure threshold up by the same strategy of TCP. It is a + * fraction of global memory that is up to 1/2 at 256 MB, decreasing + * toward zero with the amount of memory, with a floor of 128 pages. + */ + limit = min(nr_all_pages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT); + limit = (limit * (nr_all_pages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11); + limit = max(limit, 128UL); + sysctl_udp_mem[0] = limit / 4 * 3; + sysctl_udp_mem[1] = limit; + sysctl_udp_mem[2] = sysctl_udp_mem[0] * 2; + + sysctl_udp_rmem_min = SK_MEM_QUANTUM; + sysctl_udp_wmem_min = SK_MEM_QUANTUM; +} + EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); +EXPORT_SYMBOL(udp_lib_get_port); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register);