]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/packet/af_packet.c
packet: Add GSO/csum offload support.
[net-next-2.6.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
02c30a84 8 * Authors: Ross Biro
1da177e4
LT
9 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
1ce4f28b 12 * Fixes:
1da177e4
LT
13 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 35 * Ulises Alonso : Frame number limit removal and
1da177e4 36 * packet_set_ring memory leak.
0fb375fb
EB
37 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
1ce4f28b 40 * byte arrays at the end of sockaddr_ll
0fb375fb 41 * and packet_mreq.
69e3c75f 42 * Johann Baudy : Added TX RING.
1da177e4
LT
43 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
1ce4f28b 50
1da177e4 51#include <linux/types.h>
1da177e4 52#include <linux/mm.h>
4fc268d2 53#include <linux/capability.h>
1da177e4
LT
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
ffbc6111 61#include <linux/kernel.h>
1da177e4 62#include <linux/kmod.h>
457c4cbc 63#include <net/net_namespace.h>
1da177e4
LT
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
a1f8e7f7 74#include <asm/cacheflush.h>
1da177e4
LT
75#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
905db440 81#include <linux/mutex.h>
05423b24 82#include <linux/if_vlan.h>
bfd5f4a3 83#include <linux/virtio_net.h>
1da177e4
LT
84
85#ifdef CONFIG_INET
86#include <net/inet_common.h>
87#endif
88
1da177e4
LT
89/*
90 Assumptions:
91 - if device has no dev->hard_header routine, it adds and removes ll header
92 inside itself. In this case ll header is invisible outside of device,
93 but higher levels still should reserve dev->hard_header_len.
94 Some devices are enough clever to reallocate skb, when header
95 will not fit to reserved space (tunnel), another ones are silly
96 (PPP).
97 - packet socket receives packets with pulled ll header,
98 so that SOCK_RAW should push it back.
99
100On receive:
101-----------
102
103Incoming, dev->hard_header!=NULL
b0e380b1
ACM
104 mac_header -> ll header
105 data -> data
1da177e4
LT
106
107Outgoing, dev->hard_header!=NULL
b0e380b1
ACM
108 mac_header -> ll header
109 data -> ll header
1da177e4
LT
110
111Incoming, dev->hard_header==NULL
b0e380b1
ACM
112 mac_header -> UNKNOWN position. It is very likely, that it points to ll
113 header. PPP makes it, that is wrong, because introduce
db0c58f9 114 assymetry between rx and tx paths.
b0e380b1 115 data -> data
1da177e4
LT
116
117Outgoing, dev->hard_header==NULL
b0e380b1
ACM
118 mac_header -> data. ll header is still not built!
119 data -> data
1da177e4
LT
120
121Resume
122 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
123
124
125On transmit:
126------------
127
128dev->hard_header != NULL
b0e380b1
ACM
129 mac_header -> ll header
130 data -> ll header
1da177e4
LT
131
132dev->hard_header == NULL (ll header is added by device, we cannot control it)
b0e380b1
ACM
133 mac_header -> data
134 data -> data
1da177e4
LT
135
136 We should set nh.raw on output to correct posistion,
137 packet classifier depends on it.
138 */
139
1da177e4
LT
140/* Private packet socket structures. */
141
40d4e3df 142struct packet_mclist {
1da177e4
LT
143 struct packet_mclist *next;
144 int ifindex;
145 int count;
146 unsigned short type;
147 unsigned short alen;
0fb375fb
EB
148 unsigned char addr[MAX_ADDR_LEN];
149};
150/* identical to struct packet_mreq except it has
151 * a longer address field.
152 */
40d4e3df 153struct packet_mreq_max {
0fb375fb
EB
154 int mr_ifindex;
155 unsigned short mr_type;
156 unsigned short mr_alen;
157 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4 158};
a2efcfa0 159
1da177e4 160#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
161static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
162 int closing, int tx_ring);
163
164struct packet_ring_buffer {
40d4e3df 165 char **pg_vec;
69e3c75f
JB
166 unsigned int head;
167 unsigned int frames_per_block;
168 unsigned int frame_size;
169 unsigned int frame_max;
170
171 unsigned int pg_vec_order;
172 unsigned int pg_vec_pages;
173 unsigned int pg_vec_len;
174
175 atomic_t pending;
176};
177
178struct packet_sock;
179static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
1da177e4
LT
180#endif
181
182static void packet_flush_mclist(struct sock *sk);
183
184struct packet_sock {
185 /* struct sock has to be the first member of packet_sock */
186 struct sock sk;
187 struct tpacket_stats stats;
188#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
189 struct packet_ring_buffer rx_ring;
190 struct packet_ring_buffer tx_ring;
1da177e4
LT
191 int copy_thresh;
192#endif
1da177e4 193 spinlock_t bind_lock;
905db440 194 struct mutex pg_vec_lock;
8dc41944 195 unsigned int running:1, /* prot_hook is attached*/
80feaacb 196 auxdata:1,
bfd5f4a3
SS
197 origdev:1,
198 has_vnet_hdr:1;
1da177e4 199 int ifindex; /* bound device */
0e11c91e 200 __be16 num;
1da177e4 201 struct packet_mclist *mclist;
1da177e4
LT
202#ifdef CONFIG_PACKET_MMAP
203 atomic_t mapped;
bbd6ef87
PM
204 enum tpacket_versions tp_version;
205 unsigned int tp_hdrlen;
8913336a 206 unsigned int tp_reserve;
69e3c75f 207 unsigned int tp_loss:1;
1da177e4 208#endif
94b05952 209 struct packet_type prot_hook ____cacheline_aligned_in_smp;
1da177e4
LT
210};
211
ffbc6111
HX
212struct packet_skb_cb {
213 unsigned int origlen;
214 union {
215 struct sockaddr_pkt pkt;
216 struct sockaddr_ll ll;
217 } sa;
218};
219
220#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 221
1da177e4
LT
222#ifdef CONFIG_PACKET_MMAP
223
69e3c75f 224static void __packet_set_status(struct packet_sock *po, void *frame, int status)
1da177e4 225{
bbd6ef87
PM
226 union {
227 struct tpacket_hdr *h1;
228 struct tpacket2_hdr *h2;
229 void *raw;
230 } h;
1da177e4 231
69e3c75f 232 h.raw = frame;
bbd6ef87
PM
233 switch (po->tp_version) {
234 case TPACKET_V1:
69e3c75f
JB
235 h.h1->tp_status = status;
236 flush_dcache_page(virt_to_page(&h.h1->tp_status));
bbd6ef87
PM
237 break;
238 case TPACKET_V2:
69e3c75f
JB
239 h.h2->tp_status = status;
240 flush_dcache_page(virt_to_page(&h.h2->tp_status));
bbd6ef87 241 break;
69e3c75f 242 default:
40d4e3df 243 pr_err("TPACKET version not supported\n");
69e3c75f 244 BUG();
bbd6ef87 245 }
69e3c75f
JB
246
247 smp_wmb();
bbd6ef87
PM
248}
249
69e3c75f 250static int __packet_get_status(struct packet_sock *po, void *frame)
bbd6ef87
PM
251{
252 union {
253 struct tpacket_hdr *h1;
254 struct tpacket2_hdr *h2;
255 void *raw;
256 } h;
257
69e3c75f
JB
258 smp_rmb();
259
bbd6ef87
PM
260 h.raw = frame;
261 switch (po->tp_version) {
262 case TPACKET_V1:
69e3c75f
JB
263 flush_dcache_page(virt_to_page(&h.h1->tp_status));
264 return h.h1->tp_status;
bbd6ef87 265 case TPACKET_V2:
69e3c75f
JB
266 flush_dcache_page(virt_to_page(&h.h2->tp_status));
267 return h.h2->tp_status;
268 default:
40d4e3df 269 pr_err("TPACKET version not supported\n");
69e3c75f
JB
270 BUG();
271 return 0;
bbd6ef87 272 }
1da177e4 273}
69e3c75f
JB
274
275static void *packet_lookup_frame(struct packet_sock *po,
276 struct packet_ring_buffer *rb,
277 unsigned int position,
278 int status)
279{
280 unsigned int pg_vec_pos, frame_offset;
281 union {
282 struct tpacket_hdr *h1;
283 struct tpacket2_hdr *h2;
284 void *raw;
285 } h;
286
287 pg_vec_pos = position / rb->frames_per_block;
288 frame_offset = position % rb->frames_per_block;
289
290 h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size);
291
292 if (status != __packet_get_status(po, h.raw))
293 return NULL;
294
295 return h.raw;
296}
297
298static inline void *packet_current_frame(struct packet_sock *po,
299 struct packet_ring_buffer *rb,
300 int status)
301{
302 return packet_lookup_frame(po, rb, rb->head, status);
303}
304
305static inline void *packet_previous_frame(struct packet_sock *po,
306 struct packet_ring_buffer *rb,
307 int status)
308{
309 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
310 return packet_lookup_frame(po, rb, previous, status);
311}
312
313static inline void packet_increment_head(struct packet_ring_buffer *buff)
314{
315 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
316}
317
1da177e4
LT
318#endif
319
320static inline struct packet_sock *pkt_sk(struct sock *sk)
321{
322 return (struct packet_sock *)sk;
323}
324
325static void packet_sock_destruct(struct sock *sk)
326{
547b792c
IJ
327 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
328 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1da177e4
LT
329
330 if (!sock_flag(sk, SOCK_DEAD)) {
40d4e3df 331 pr_err("Attempt to release alive packet socket: %p\n", sk);
1da177e4
LT
332 return;
333 }
334
17ab56a2 335 sk_refcnt_debug_dec(sk);
1da177e4
LT
336}
337
338
90ddc4f0 339static const struct proto_ops packet_ops;
1da177e4 340
90ddc4f0 341static const struct proto_ops packet_ops_spkt;
1da177e4 342
40d4e3df
ED
343static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
344 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
345{
346 struct sock *sk;
347 struct sockaddr_pkt *spkt;
348
349 /*
350 * When we registered the protocol we saved the socket in the data
351 * field for just this event.
352 */
353
354 sk = pt->af_packet_priv;
1ce4f28b 355
1da177e4
LT
356 /*
357 * Yank back the headers [hope the device set this
358 * right or kerboom...]
359 *
360 * Incoming packets have ll header pulled,
361 * push it back.
362 *
98e399f8 363 * For outgoing ones skb->data == skb_mac_header(skb)
1da177e4
LT
364 * so that this procedure is noop.
365 */
366
367 if (skb->pkt_type == PACKET_LOOPBACK)
368 goto out;
369
09ad9bc7 370 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
371 goto out;
372
40d4e3df
ED
373 skb = skb_share_check(skb, GFP_ATOMIC);
374 if (skb == NULL)
1da177e4
LT
375 goto oom;
376
377 /* drop any routing info */
adf30907 378 skb_dst_drop(skb);
1da177e4 379
84531c24
PO
380 /* drop conntrack reference */
381 nf_reset(skb);
382
ffbc6111 383 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4 384
98e399f8 385 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
386
387 /*
388 * The SOCK_PACKET socket receives _all_ frames.
389 */
390
391 spkt->spkt_family = dev->type;
392 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
393 spkt->spkt_protocol = skb->protocol;
394
395 /*
396 * Charge the memory to the socket. This is done specifically
397 * to prevent sockets using all the memory up.
398 */
399
40d4e3df 400 if (sock_queue_rcv_skb(sk, skb) == 0)
1da177e4
LT
401 return 0;
402
403out:
404 kfree_skb(skb);
405oom:
406 return 0;
407}
408
409
410/*
411 * Output a raw packet to a device layer. This bypasses all the other
412 * protocol layers and you must therefore supply it with a complete frame
413 */
1ce4f28b 414
1da177e4
LT
415static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
416 struct msghdr *msg, size_t len)
417{
418 struct sock *sk = sock->sk;
40d4e3df 419 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1a35ca80 420 struct sk_buff *skb = NULL;
1da177e4 421 struct net_device *dev;
40d4e3df 422 __be16 proto = 0;
1da177e4 423 int err;
1ce4f28b 424
1da177e4 425 /*
1ce4f28b 426 * Get and verify the address.
1da177e4
LT
427 */
428
40d4e3df 429 if (saddr) {
1da177e4 430 if (msg->msg_namelen < sizeof(struct sockaddr))
40d4e3df
ED
431 return -EINVAL;
432 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
433 proto = saddr->spkt_protocol;
434 } else
435 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
1da177e4
LT
436
437 /*
1ce4f28b 438 * Find the device first to size check it
1da177e4
LT
439 */
440
441 saddr->spkt_device[13] = 0;
1a35ca80 442retry:
654d1f8a
ED
443 rcu_read_lock();
444 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1da177e4
LT
445 err = -ENODEV;
446 if (dev == NULL)
447 goto out_unlock;
1ce4f28b 448
d5e76b0a
DM
449 err = -ENETDOWN;
450 if (!(dev->flags & IFF_UP))
451 goto out_unlock;
452
1da177e4 453 /*
40d4e3df
ED
454 * You may not queue a frame bigger than the mtu. This is the lowest level
455 * raw protocol and you must do your own fragmentation at this level.
1da177e4 456 */
1ce4f28b 457
1da177e4 458 err = -EMSGSIZE;
8ae55f04 459 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
460 goto out_unlock;
461
1a35ca80
ED
462 if (!skb) {
463 size_t reserved = LL_RESERVED_SPACE(dev);
464 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
465
466 rcu_read_unlock();
467 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
468 if (skb == NULL)
469 return -ENOBUFS;
470 /* FIXME: Save some space for broken drivers that write a hard
471 * header at transmission time by themselves. PPP is the notable
472 * one here. This should really be fixed at the driver level.
473 */
474 skb_reserve(skb, reserved);
475 skb_reset_network_header(skb);
476
477 /* Try to align data part correctly */
478 if (hhlen) {
479 skb->data -= hhlen;
480 skb->tail -= hhlen;
481 if (len < hhlen)
482 skb_reset_network_header(skb);
483 }
484 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
485 if (err)
486 goto out_free;
487 goto retry;
1da177e4
LT
488 }
489
1a35ca80 490
1da177e4
LT
491 skb->protocol = proto;
492 skb->dev = dev;
493 skb->priority = sk->sk_priority;
2d37a186 494 skb->mark = sk->sk_mark;
1da177e4
LT
495
496 dev_queue_xmit(skb);
654d1f8a 497 rcu_read_unlock();
40d4e3df 498 return len;
1da177e4 499
1da177e4 500out_unlock:
654d1f8a 501 rcu_read_unlock();
1a35ca80
ED
502out_free:
503 kfree_skb(skb);
1da177e4
LT
504 return err;
505}
1da177e4 506
dbcb5855
DM
507static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
508 unsigned int res)
1da177e4
LT
509{
510 struct sk_filter *filter;
fda9ef5d
DM
511
512 rcu_read_lock_bh();
513 filter = rcu_dereference(sk->sk_filter);
dbcb5855
DM
514 if (filter != NULL)
515 res = sk_run_filter(skb, filter->insns, filter->len);
fda9ef5d 516 rcu_read_unlock_bh();
1da177e4 517
dbcb5855 518 return res;
1da177e4
LT
519}
520
521/*
522 This function makes lazy skb cloning in hope that most of packets
523 are discarded by BPF.
524
525 Note tricky part: we DO mangle shared skb! skb->data, skb->len
526 and skb->cb are mangled. It works because (and until) packets
527 falling here are owned by current CPU. Output packets are cloned
528 by dev_queue_xmit_nit(), input packets are processed by net_bh
529 sequencially, so that if we return skb to original state on exit,
530 we will not harm anyone.
531 */
532
40d4e3df
ED
533static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
534 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
535{
536 struct sock *sk;
537 struct sockaddr_ll *sll;
538 struct packet_sock *po;
40d4e3df 539 u8 *skb_head = skb->data;
1da177e4 540 int skb_len = skb->len;
dbcb5855 541 unsigned int snaplen, res;
1da177e4
LT
542
543 if (skb->pkt_type == PACKET_LOOPBACK)
544 goto drop;
545
546 sk = pt->af_packet_priv;
547 po = pkt_sk(sk);
548
09ad9bc7 549 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
550 goto drop;
551
1da177e4
LT
552 skb->dev = dev;
553
3b04ddde 554 if (dev->header_ops) {
1da177e4
LT
555 /* The device has an explicit notion of ll header,
556 exported to higher levels.
557
558 Otherwise, the device hides datails of it frame
559 structure, so that corresponding packet head
560 never delivered to user.
561 */
562 if (sk->sk_type != SOCK_DGRAM)
98e399f8 563 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
564 else if (skb->pkt_type == PACKET_OUTGOING) {
565 /* Special case: outgoing packets have ll header at head */
bbe735e4 566 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
567 }
568 }
569
570 snaplen = skb->len;
571
dbcb5855
DM
572 res = run_filter(skb, sk, snaplen);
573 if (!res)
fda9ef5d 574 goto drop_n_restore;
dbcb5855
DM
575 if (snaplen > res)
576 snaplen = res;
1da177e4
LT
577
578 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
579 (unsigned)sk->sk_rcvbuf)
580 goto drop_n_acct;
581
582 if (skb_shared(skb)) {
583 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
584 if (nskb == NULL)
585 goto drop_n_acct;
586
587 if (skb_head != skb->data) {
588 skb->data = skb_head;
589 skb->len = skb_len;
590 }
591 kfree_skb(skb);
592 skb = nskb;
593 }
594
ffbc6111
HX
595 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
596 sizeof(skb->cb));
597
598 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
599 sll->sll_family = AF_PACKET;
600 sll->sll_hatype = dev->type;
601 sll->sll_protocol = skb->protocol;
602 sll->sll_pkttype = skb->pkt_type;
8032b464 603 if (unlikely(po->origdev))
80feaacb
PWJ
604 sll->sll_ifindex = orig_dev->ifindex;
605 else
606 sll->sll_ifindex = dev->ifindex;
1da177e4 607
b95cce35 608 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4 609
ffbc6111 610 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 611
1da177e4
LT
612 if (pskb_trim(skb, snaplen))
613 goto drop_n_acct;
614
615 skb_set_owner_r(skb, sk);
616 skb->dev = NULL;
adf30907 617 skb_dst_drop(skb);
1da177e4 618
84531c24
PO
619 /* drop conntrack reference */
620 nf_reset(skb);
621
1da177e4
LT
622 spin_lock(&sk->sk_receive_queue.lock);
623 po->stats.tp_packets++;
3b885787 624 skb->dropcount = atomic_read(&sk->sk_drops);
1da177e4
LT
625 __skb_queue_tail(&sk->sk_receive_queue, skb);
626 spin_unlock(&sk->sk_receive_queue.lock);
627 sk->sk_data_ready(sk, skb->len);
628 return 0;
629
630drop_n_acct:
3b885787 631 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
1da177e4
LT
632
633drop_n_restore:
634 if (skb_head != skb->data && skb_shared(skb)) {
635 skb->data = skb_head;
636 skb->len = skb_len;
637 }
638drop:
ead2ceb0 639 consume_skb(skb);
1da177e4
LT
640 return 0;
641}
642
643#ifdef CONFIG_PACKET_MMAP
40d4e3df
ED
644static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
645 struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
646{
647 struct sock *sk;
648 struct packet_sock *po;
649 struct sockaddr_ll *sll;
bbd6ef87
PM
650 union {
651 struct tpacket_hdr *h1;
652 struct tpacket2_hdr *h2;
653 void *raw;
654 } h;
40d4e3df 655 u8 *skb_head = skb->data;
1da177e4 656 int skb_len = skb->len;
dbcb5855 657 unsigned int snaplen, res;
1da177e4 658 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
bbd6ef87 659 unsigned short macoff, netoff, hdrlen;
1da177e4 660 struct sk_buff *copy_skb = NULL;
b7aa0bf7 661 struct timeval tv;
bbd6ef87 662 struct timespec ts;
1da177e4
LT
663
664 if (skb->pkt_type == PACKET_LOOPBACK)
665 goto drop;
666
667 sk = pt->af_packet_priv;
668 po = pkt_sk(sk);
669
09ad9bc7 670 if (!net_eq(dev_net(dev), sock_net(sk)))
d12d01d6
DL
671 goto drop;
672
3b04ddde 673 if (dev->header_ops) {
1da177e4 674 if (sk->sk_type != SOCK_DGRAM)
98e399f8 675 skb_push(skb, skb->data - skb_mac_header(skb));
1da177e4
LT
676 else if (skb->pkt_type == PACKET_OUTGOING) {
677 /* Special case: outgoing packets have ll header at head */
bbe735e4 678 skb_pull(skb, skb_network_offset(skb));
1da177e4
LT
679 }
680 }
681
8dc41944
HX
682 if (skb->ip_summed == CHECKSUM_PARTIAL)
683 status |= TP_STATUS_CSUMNOTREADY;
684
1da177e4
LT
685 snaplen = skb->len;
686
dbcb5855
DM
687 res = run_filter(skb, sk, snaplen);
688 if (!res)
fda9ef5d 689 goto drop_n_restore;
dbcb5855
DM
690 if (snaplen > res)
691 snaplen = res;
1da177e4
LT
692
693 if (sk->sk_type == SOCK_DGRAM) {
8913336a
PM
694 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
695 po->tp_reserve;
1da177e4 696 } else {
bbe735e4 697 unsigned maclen = skb_network_offset(skb);
bbd6ef87 698 netoff = TPACKET_ALIGN(po->tp_hdrlen +
8913336a
PM
699 (maclen < 16 ? 16 : maclen)) +
700 po->tp_reserve;
1da177e4
LT
701 macoff = netoff - maclen;
702 }
703
69e3c75f 704 if (macoff + snaplen > po->rx_ring.frame_size) {
1da177e4
LT
705 if (po->copy_thresh &&
706 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
707 (unsigned)sk->sk_rcvbuf) {
708 if (skb_shared(skb)) {
709 copy_skb = skb_clone(skb, GFP_ATOMIC);
710 } else {
711 copy_skb = skb_get(skb);
712 skb_head = skb->data;
713 }
714 if (copy_skb)
715 skb_set_owner_r(copy_skb, sk);
716 }
69e3c75f 717 snaplen = po->rx_ring.frame_size - macoff;
1da177e4
LT
718 if ((int)snaplen < 0)
719 snaplen = 0;
720 }
1da177e4
LT
721
722 spin_lock(&sk->sk_receive_queue.lock);
69e3c75f 723 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
bbd6ef87 724 if (!h.raw)
1da177e4 725 goto ring_is_full;
69e3c75f 726 packet_increment_head(&po->rx_ring);
1da177e4
LT
727 po->stats.tp_packets++;
728 if (copy_skb) {
729 status |= TP_STATUS_COPY;
730 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
731 }
732 if (!po->stats.tp_drops)
733 status &= ~TP_STATUS_LOSING;
734 spin_unlock(&sk->sk_receive_queue.lock);
735
bbd6ef87 736 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1da177e4 737
bbd6ef87
PM
738 switch (po->tp_version) {
739 case TPACKET_V1:
740 h.h1->tp_len = skb->len;
741 h.h1->tp_snaplen = snaplen;
742 h.h1->tp_mac = macoff;
743 h.h1->tp_net = netoff;
744 if (skb->tstamp.tv64)
745 tv = ktime_to_timeval(skb->tstamp);
746 else
747 do_gettimeofday(&tv);
748 h.h1->tp_sec = tv.tv_sec;
749 h.h1->tp_usec = tv.tv_usec;
750 hdrlen = sizeof(*h.h1);
751 break;
752 case TPACKET_V2:
753 h.h2->tp_len = skb->len;
754 h.h2->tp_snaplen = snaplen;
755 h.h2->tp_mac = macoff;
756 h.h2->tp_net = netoff;
757 if (skb->tstamp.tv64)
758 ts = ktime_to_timespec(skb->tstamp);
759 else
760 getnstimeofday(&ts);
761 h.h2->tp_sec = ts.tv_sec;
762 h.h2->tp_nsec = ts.tv_nsec;
05423b24 763 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
bbd6ef87
PM
764 hdrlen = sizeof(*h.h2);
765 break;
766 default:
767 BUG();
768 }
1da177e4 769
bbd6ef87 770 sll = h.raw + TPACKET_ALIGN(hdrlen);
b95cce35 771 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1da177e4
LT
772 sll->sll_family = AF_PACKET;
773 sll->sll_hatype = dev->type;
774 sll->sll_protocol = skb->protocol;
775 sll->sll_pkttype = skb->pkt_type;
8032b464 776 if (unlikely(po->origdev))
80feaacb
PWJ
777 sll->sll_ifindex = orig_dev->ifindex;
778 else
779 sll->sll_ifindex = dev->ifindex;
1da177e4 780
bbd6ef87 781 __packet_set_status(po, h.raw, status);
e16aa207 782 smp_mb();
1da177e4
LT
783 {
784 struct page *p_start, *p_end;
bbd6ef87 785 u8 *h_end = h.raw + macoff + snaplen - 1;
1da177e4 786
bbd6ef87 787 p_start = virt_to_page(h.raw);
1da177e4
LT
788 p_end = virt_to_page(h_end);
789 while (p_start <= p_end) {
790 flush_dcache_page(p_start);
791 p_start++;
792 }
793 }
794
795 sk->sk_data_ready(sk, 0);
796
797drop_n_restore:
798 if (skb_head != skb->data && skb_shared(skb)) {
799 skb->data = skb_head;
800 skb->len = skb_len;
801 }
802drop:
1ce4f28b 803 kfree_skb(skb);
1da177e4
LT
804 return 0;
805
806ring_is_full:
807 po->stats.tp_drops++;
808 spin_unlock(&sk->sk_receive_queue.lock);
809
810 sk->sk_data_ready(sk, 0);
acb5d75b 811 kfree_skb(copy_skb);
1da177e4
LT
812 goto drop_n_restore;
813}
814
69e3c75f
JB
815static void tpacket_destruct_skb(struct sk_buff *skb)
816{
817 struct packet_sock *po = pkt_sk(skb->sk);
40d4e3df 818 void *ph;
1da177e4 819
69e3c75f 820 BUG_ON(skb == NULL);
1da177e4 821
69e3c75f
JB
822 if (likely(po->tx_ring.pg_vec)) {
823 ph = skb_shinfo(skb)->destructor_arg;
824 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
825 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
826 atomic_dec(&po->tx_ring.pending);
827 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
828 }
829
830 sock_wfree(skb);
831}
832
40d4e3df
ED
833static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
834 void *frame, struct net_device *dev, int size_max,
835 __be16 proto, unsigned char *addr)
69e3c75f
JB
836{
837 union {
838 struct tpacket_hdr *h1;
839 struct tpacket2_hdr *h2;
840 void *raw;
841 } ph;
842 int to_write, offset, len, tp_len, nr_frags, len_max;
843 struct socket *sock = po->sk.sk_socket;
844 struct page *page;
845 void *data;
846 int err;
847
848 ph.raw = frame;
849
850 skb->protocol = proto;
851 skb->dev = dev;
852 skb->priority = po->sk.sk_priority;
2d37a186 853 skb->mark = po->sk.sk_mark;
69e3c75f
JB
854 skb_shinfo(skb)->destructor_arg = ph.raw;
855
856 switch (po->tp_version) {
857 case TPACKET_V2:
858 tp_len = ph.h2->tp_len;
859 break;
860 default:
861 tp_len = ph.h1->tp_len;
862 break;
863 }
864 if (unlikely(tp_len > size_max)) {
40d4e3df 865 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
69e3c75f
JB
866 return -EMSGSIZE;
867 }
868
869 skb_reserve(skb, LL_RESERVED_SPACE(dev));
870 skb_reset_network_header(skb);
871
872 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
873 to_write = tp_len;
874
875 if (sock->type == SOCK_DGRAM) {
876 err = dev_hard_header(skb, dev, ntohs(proto), addr,
877 NULL, tp_len);
878 if (unlikely(err < 0))
879 return -EINVAL;
40d4e3df 880 } else if (dev->hard_header_len) {
69e3c75f
JB
881 /* net device doesn't like empty head */
882 if (unlikely(tp_len <= dev->hard_header_len)) {
40d4e3df
ED
883 pr_err("packet size is too short (%d < %d)\n",
884 tp_len, dev->hard_header_len);
69e3c75f
JB
885 return -EINVAL;
886 }
887
888 skb_push(skb, dev->hard_header_len);
889 err = skb_store_bits(skb, 0, data,
890 dev->hard_header_len);
891 if (unlikely(err))
892 return err;
893
894 data += dev->hard_header_len;
895 to_write -= dev->hard_header_len;
896 }
897
898 err = -EFAULT;
899 page = virt_to_page(data);
900 offset = offset_in_page(data);
901 len_max = PAGE_SIZE - offset;
902 len = ((to_write > len_max) ? len_max : to_write);
903
904 skb->data_len = to_write;
905 skb->len += to_write;
906 skb->truesize += to_write;
907 atomic_add(to_write, &po->sk.sk_wmem_alloc);
908
909 while (likely(to_write)) {
910 nr_frags = skb_shinfo(skb)->nr_frags;
911
912 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
40d4e3df
ED
913 pr_err("Packet exceed the number of skb frags(%lu)\n",
914 MAX_SKB_FRAGS);
69e3c75f
JB
915 return -EFAULT;
916 }
917
918 flush_dcache_page(page);
919 get_page(page);
920 skb_fill_page_desc(skb,
921 nr_frags,
922 page++, offset, len);
923 to_write -= len;
924 offset = 0;
925 len_max = PAGE_SIZE;
926 len = ((to_write > len_max) ? len_max : to_write);
927 }
928
929 return tp_len;
930}
931
932static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
933{
934 struct socket *sock;
935 struct sk_buff *skb;
936 struct net_device *dev;
937 __be16 proto;
938 int ifindex, err, reserve = 0;
40d4e3df
ED
939 void *ph;
940 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
69e3c75f
JB
941 int tp_len, size_max;
942 unsigned char *addr;
943 int len_sum = 0;
944 int status = 0;
945
946 sock = po->sk.sk_socket;
947
948 mutex_lock(&po->pg_vec_lock);
949
950 err = -EBUSY;
951 if (saddr == NULL) {
952 ifindex = po->ifindex;
953 proto = po->num;
954 addr = NULL;
955 } else {
956 err = -EINVAL;
957 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
958 goto out;
959 if (msg->msg_namelen < (saddr->sll_halen
960 + offsetof(struct sockaddr_ll,
961 sll_addr)))
962 goto out;
963 ifindex = saddr->sll_ifindex;
964 proto = saddr->sll_protocol;
965 addr = saddr->sll_addr;
966 }
967
968 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
969 err = -ENXIO;
970 if (unlikely(dev == NULL))
971 goto out;
972
973 reserve = dev->hard_header_len;
974
975 err = -ENETDOWN;
976 if (unlikely(!(dev->flags & IFF_UP)))
977 goto out_put;
978
979 size_max = po->tx_ring.frame_size
b5dd884e 980 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
69e3c75f
JB
981
982 if (size_max > dev->mtu + reserve)
983 size_max = dev->mtu + reserve;
984
985 do {
986 ph = packet_current_frame(po, &po->tx_ring,
987 TP_STATUS_SEND_REQUEST);
988
989 if (unlikely(ph == NULL)) {
990 schedule();
991 continue;
992 }
993
994 status = TP_STATUS_SEND_REQUEST;
995 skb = sock_alloc_send_skb(&po->sk,
996 LL_ALLOCATED_SPACE(dev)
997 + sizeof(struct sockaddr_ll),
998 0, &err);
999
1000 if (unlikely(skb == NULL))
1001 goto out_status;
1002
1003 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1004 addr);
1005
1006 if (unlikely(tp_len < 0)) {
1007 if (po->tp_loss) {
1008 __packet_set_status(po, ph,
1009 TP_STATUS_AVAILABLE);
1010 packet_increment_head(&po->tx_ring);
1011 kfree_skb(skb);
1012 continue;
1013 } else {
1014 status = TP_STATUS_WRONG_FORMAT;
1015 err = tp_len;
1016 goto out_status;
1017 }
1018 }
1019
1020 skb->destructor = tpacket_destruct_skb;
1021 __packet_set_status(po, ph, TP_STATUS_SENDING);
1022 atomic_inc(&po->tx_ring.pending);
1023
1024 status = TP_STATUS_SEND_REQUEST;
1025 err = dev_queue_xmit(skb);
eb70df13
JP
1026 if (unlikely(err > 0)) {
1027 err = net_xmit_errno(err);
1028 if (err && __packet_get_status(po, ph) ==
1029 TP_STATUS_AVAILABLE) {
1030 /* skb was destructed already */
1031 skb = NULL;
1032 goto out_status;
1033 }
1034 /*
1035 * skb was dropped but not destructed yet;
1036 * let's treat it like congestion or err < 0
1037 */
1038 err = 0;
1039 }
69e3c75f
JB
1040 packet_increment_head(&po->tx_ring);
1041 len_sum += tp_len;
f64f9e71
JP
1042 } while (likely((ph != NULL) ||
1043 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1044 (atomic_read(&po->tx_ring.pending))))
1045 );
69e3c75f
JB
1046
1047 err = len_sum;
1048 goto out_put;
1049
69e3c75f
JB
1050out_status:
1051 __packet_set_status(po, ph, status);
1052 kfree_skb(skb);
1053out_put:
1054 dev_put(dev);
1055out:
1056 mutex_unlock(&po->pg_vec_lock);
1057 return err;
1058}
1059#endif
1060
bfd5f4a3
SS
1061static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1062 size_t reserve, size_t len,
1063 size_t linear, int noblock,
1064 int *err)
1065{
1066 struct sk_buff *skb;
1067
1068 /* Under a page? Don't bother with paged skb. */
1069 if (prepad + len < PAGE_SIZE || !linear)
1070 linear = len;
1071
1072 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1073 err);
1074 if (!skb)
1075 return NULL;
1076
1077 skb_reserve(skb, reserve);
1078 skb_put(skb, linear);
1079 skb->data_len = len - linear;
1080 skb->len += len - linear;
1081
1082 return skb;
1083}
1084
69e3c75f 1085static int packet_snd(struct socket *sock,
1da177e4
LT
1086 struct msghdr *msg, size_t len)
1087{
1088 struct sock *sk = sock->sk;
40d4e3df 1089 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1da177e4
LT
1090 struct sk_buff *skb;
1091 struct net_device *dev;
0e11c91e 1092 __be16 proto;
1da177e4
LT
1093 unsigned char *addr;
1094 int ifindex, err, reserve = 0;
bfd5f4a3
SS
1095 struct virtio_net_hdr vnet_hdr = { 0 };
1096 int offset = 0;
1097 int vnet_hdr_len;
1098 struct packet_sock *po = pkt_sk(sk);
1099 unsigned short gso_type = 0;
1da177e4
LT
1100
1101 /*
1ce4f28b 1102 * Get and verify the address.
1da177e4 1103 */
1ce4f28b 1104
1da177e4 1105 if (saddr == NULL) {
1da177e4
LT
1106 ifindex = po->ifindex;
1107 proto = po->num;
1108 addr = NULL;
1109 } else {
1110 err = -EINVAL;
1111 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1112 goto out;
0fb375fb
EB
1113 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1114 goto out;
1da177e4
LT
1115 ifindex = saddr->sll_ifindex;
1116 proto = saddr->sll_protocol;
1117 addr = saddr->sll_addr;
1118 }
1119
1120
3b1e0a65 1121 dev = dev_get_by_index(sock_net(sk), ifindex);
1da177e4
LT
1122 err = -ENXIO;
1123 if (dev == NULL)
1124 goto out_unlock;
1125 if (sock->type == SOCK_RAW)
1126 reserve = dev->hard_header_len;
1127
d5e76b0a
DM
1128 err = -ENETDOWN;
1129 if (!(dev->flags & IFF_UP))
1130 goto out_unlock;
1131
bfd5f4a3
SS
1132 if (po->has_vnet_hdr) {
1133 vnet_hdr_len = sizeof(vnet_hdr);
1134
1135 err = -EINVAL;
1136 if (len < vnet_hdr_len)
1137 goto out_unlock;
1138
1139 len -= vnet_hdr_len;
1140
1141 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1142 vnet_hdr_len);
1143 if (err < 0)
1144 goto out_unlock;
1145
1146 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1147 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1148 vnet_hdr.hdr_len))
1149 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1150 vnet_hdr.csum_offset + 2;
1151
1152 err = -EINVAL;
1153 if (vnet_hdr.hdr_len > len)
1154 goto out_unlock;
1155
1156 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1157 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1158 case VIRTIO_NET_HDR_GSO_TCPV4:
1159 gso_type = SKB_GSO_TCPV4;
1160 break;
1161 case VIRTIO_NET_HDR_GSO_TCPV6:
1162 gso_type = SKB_GSO_TCPV6;
1163 break;
1164 case VIRTIO_NET_HDR_GSO_UDP:
1165 gso_type = SKB_GSO_UDP;
1166 break;
1167 default:
1168 goto out_unlock;
1169 }
1170
1171 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1172 gso_type |= SKB_GSO_TCP_ECN;
1173
1174 if (vnet_hdr.gso_size == 0)
1175 goto out_unlock;
1176
1177 }
1178 }
1179
1da177e4 1180 err = -EMSGSIZE;
bfd5f4a3 1181 if (!gso_type && (len > dev->mtu+reserve))
1da177e4
LT
1182 goto out_unlock;
1183
bfd5f4a3
SS
1184 err = -ENOBUFS;
1185 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1186 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1187 msg->msg_flags & MSG_DONTWAIT, &err);
40d4e3df 1188 if (skb == NULL)
1da177e4
LT
1189 goto out_unlock;
1190
bfd5f4a3 1191 skb_set_network_header(skb, reserve);
1da177e4 1192
0c4e8581
SH
1193 err = -EINVAL;
1194 if (sock->type == SOCK_DGRAM &&
bfd5f4a3 1195 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
0c4e8581 1196 goto out_free;
1da177e4
LT
1197
1198 /* Returns -EFAULT on error */
bfd5f4a3 1199 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1da177e4
LT
1200 if (err)
1201 goto out_free;
1202
1203 skb->protocol = proto;
1204 skb->dev = dev;
1205 skb->priority = sk->sk_priority;
2d37a186 1206 skb->mark = sk->sk_mark;
1da177e4 1207
bfd5f4a3
SS
1208 if (po->has_vnet_hdr) {
1209 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1210 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1211 vnet_hdr.csum_offset)) {
1212 err = -EINVAL;
1213 goto out_free;
1214 }
1215 }
1216
1217 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1218 skb_shinfo(skb)->gso_type = gso_type;
1219
1220 /* Header must be checked, and gso_segs computed. */
1221 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1222 skb_shinfo(skb)->gso_segs = 0;
1223
1224 len += vnet_hdr_len;
1225 }
1226
1da177e4
LT
1227 /*
1228 * Now send it
1229 */
1230
1231 err = dev_queue_xmit(skb);
1232 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1233 goto out_unlock;
1234
1235 dev_put(dev);
1236
40d4e3df 1237 return len;
1da177e4
LT
1238
1239out_free:
1240 kfree_skb(skb);
1241out_unlock:
1242 if (dev)
1243 dev_put(dev);
1244out:
1245 return err;
1246}
1247
69e3c75f
JB
1248static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1249 struct msghdr *msg, size_t len)
1250{
1251#ifdef CONFIG_PACKET_MMAP
1252 struct sock *sk = sock->sk;
1253 struct packet_sock *po = pkt_sk(sk);
1254 if (po->tx_ring.pg_vec)
1255 return tpacket_snd(po, msg);
1256 else
1257#endif
1258 return packet_snd(sock, msg, len);
1259}
1260
1da177e4
LT
1261/*
1262 * Close a PACKET socket. This is fairly simple. We immediately go
1263 * to 'closed' state and remove our protocol entry in the device list.
1264 */
1265
1266static int packet_release(struct socket *sock)
1267{
1268 struct sock *sk = sock->sk;
1269 struct packet_sock *po;
d12d01d6 1270 struct net *net;
69e3c75f
JB
1271#ifdef CONFIG_PACKET_MMAP
1272 struct tpacket_req req;
1273#endif
1da177e4
LT
1274
1275 if (!sk)
1276 return 0;
1277
3b1e0a65 1278 net = sock_net(sk);
1da177e4
LT
1279 po = pkt_sk(sk);
1280
2aaef4e4 1281 write_lock_bh(&net->packet.sklist_lock);
1da177e4 1282 sk_del_node_init(sk);
920de804 1283 sock_prot_inuse_add(net, sk->sk_prot, -1);
2aaef4e4 1284 write_unlock_bh(&net->packet.sklist_lock);
1da177e4
LT
1285
1286 /*
1287 * Unhook packet receive handler.
1288 */
1289
1290 if (po->running) {
1291 /*
1292 * Remove the protocol hook
1293 */
1294 dev_remove_pack(&po->prot_hook);
1295 po->running = 0;
1296 po->num = 0;
1297 __sock_put(sk);
1298 }
1299
1da177e4 1300 packet_flush_mclist(sk);
1da177e4
LT
1301
1302#ifdef CONFIG_PACKET_MMAP
69e3c75f
JB
1303 memset(&req, 0, sizeof(req));
1304
1305 if (po->rx_ring.pg_vec)
1306 packet_set_ring(sk, &req, 1, 0);
1307
1308 if (po->tx_ring.pg_vec)
1309 packet_set_ring(sk, &req, 1, 1);
1da177e4
LT
1310#endif
1311
1312 /*
1313 * Now the socket is dead. No more input will appear.
1314 */
1315
1316 sock_orphan(sk);
1317 sock->sk = NULL;
1318
1319 /* Purge queues */
1320
1321 skb_queue_purge(&sk->sk_receive_queue);
17ab56a2 1322 sk_refcnt_debug_release(sk);
1da177e4
LT
1323
1324 sock_put(sk);
1325 return 0;
1326}
1327
1328/*
1329 * Attach a packet hook.
1330 */
1331
0e11c91e 1332static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
1333{
1334 struct packet_sock *po = pkt_sk(sk);
1335 /*
1336 * Detach an existing hook if present.
1337 */
1338
1339 lock_sock(sk);
1340
1341 spin_lock(&po->bind_lock);
1342 if (po->running) {
1343 __sock_put(sk);
1344 po->running = 0;
1345 po->num = 0;
1346 spin_unlock(&po->bind_lock);
1347 dev_remove_pack(&po->prot_hook);
1348 spin_lock(&po->bind_lock);
1349 }
1350
1351 po->num = protocol;
1352 po->prot_hook.type = protocol;
1353 po->prot_hook.dev = dev;
1354
1355 po->ifindex = dev ? dev->ifindex : 0;
1356
1357 if (protocol == 0)
1358 goto out_unlock;
1359
be85d4ad 1360 if (!dev || (dev->flags & IFF_UP)) {
1da177e4
LT
1361 dev_add_pack(&po->prot_hook);
1362 sock_hold(sk);
1363 po->running = 1;
be85d4ad
UT
1364 } else {
1365 sk->sk_err = ENETDOWN;
1366 if (!sock_flag(sk, SOCK_DEAD))
1367 sk->sk_error_report(sk);
1da177e4
LT
1368 }
1369
1370out_unlock:
1371 spin_unlock(&po->bind_lock);
1372 release_sock(sk);
1373 return 0;
1374}
1375
1376/*
1377 * Bind a packet socket to a device
1378 */
1379
40d4e3df
ED
1380static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1381 int addr_len)
1da177e4 1382{
40d4e3df 1383 struct sock *sk = sock->sk;
1da177e4
LT
1384 char name[15];
1385 struct net_device *dev;
1386 int err = -ENODEV;
1ce4f28b 1387
1da177e4
LT
1388 /*
1389 * Check legality
1390 */
1ce4f28b 1391
8ae55f04 1392 if (addr_len != sizeof(struct sockaddr))
1da177e4 1393 return -EINVAL;
40d4e3df 1394 strlcpy(name, uaddr->sa_data, sizeof(name));
1da177e4 1395
3b1e0a65 1396 dev = dev_get_by_name(sock_net(sk), name);
1da177e4
LT
1397 if (dev) {
1398 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1399 dev_put(dev);
1400 }
1401 return err;
1402}
1da177e4
LT
1403
1404static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1405{
40d4e3df
ED
1406 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1407 struct sock *sk = sock->sk;
1da177e4
LT
1408 struct net_device *dev = NULL;
1409 int err;
1410
1411
1412 /*
1413 * Check legality
1414 */
1ce4f28b 1415
1da177e4
LT
1416 if (addr_len < sizeof(struct sockaddr_ll))
1417 return -EINVAL;
1418 if (sll->sll_family != AF_PACKET)
1419 return -EINVAL;
1420
1421 if (sll->sll_ifindex) {
1422 err = -ENODEV;
3b1e0a65 1423 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1da177e4
LT
1424 if (dev == NULL)
1425 goto out;
1426 }
1427 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1428 if (dev)
1429 dev_put(dev);
1430
1431out:
1432 return err;
1433}
1434
1435static struct proto packet_proto = {
1436 .name = "PACKET",
1437 .owner = THIS_MODULE,
1438 .obj_size = sizeof(struct packet_sock),
1439};
1440
1441/*
1ce4f28b 1442 * Create a packet of type SOCK_PACKET.
1da177e4
LT
1443 */
1444
3f378b68
EP
1445static int packet_create(struct net *net, struct socket *sock, int protocol,
1446 int kern)
1da177e4
LT
1447{
1448 struct sock *sk;
1449 struct packet_sock *po;
0e11c91e 1450 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1451 int err;
1452
1453 if (!capable(CAP_NET_RAW))
1454 return -EPERM;
be02097c
DM
1455 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1456 sock->type != SOCK_PACKET)
1da177e4
LT
1457 return -ESOCKTNOSUPPORT;
1458
1459 sock->state = SS_UNCONNECTED;
1460
1461 err = -ENOBUFS;
6257ff21 1462 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1da177e4
LT
1463 if (sk == NULL)
1464 goto out;
1465
1466 sock->ops = &packet_ops;
1da177e4
LT
1467 if (sock->type == SOCK_PACKET)
1468 sock->ops = &packet_ops_spkt;
be02097c 1469
1da177e4
LT
1470 sock_init_data(sock, sk);
1471
1472 po = pkt_sk(sk);
1473 sk->sk_family = PF_PACKET;
0e11c91e 1474 po->num = proto;
1da177e4
LT
1475
1476 sk->sk_destruct = packet_sock_destruct;
17ab56a2 1477 sk_refcnt_debug_inc(sk);
1da177e4
LT
1478
1479 /*
1480 * Attach a protocol block
1481 */
1482
1483 spin_lock_init(&po->bind_lock);
905db440 1484 mutex_init(&po->pg_vec_lock);
1da177e4 1485 po->prot_hook.func = packet_rcv;
be02097c 1486
1da177e4
LT
1487 if (sock->type == SOCK_PACKET)
1488 po->prot_hook.func = packet_rcv_spkt;
be02097c 1489
1da177e4
LT
1490 po->prot_hook.af_packet_priv = sk;
1491
0e11c91e
AV
1492 if (proto) {
1493 po->prot_hook.type = proto;
1da177e4
LT
1494 dev_add_pack(&po->prot_hook);
1495 sock_hold(sk);
1496 po->running = 1;
1497 }
1498
2aaef4e4
DL
1499 write_lock_bh(&net->packet.sklist_lock);
1500 sk_add_node(sk, &net->packet.sklist);
3680453c 1501 sock_prot_inuse_add(net, &packet_proto, 1);
920de804 1502 write_unlock_bh(&net->packet.sklist_lock);
40d4e3df 1503 return 0;
1da177e4
LT
1504out:
1505 return err;
1506}
1507
1508/*
1509 * Pull a packet from our receive queue and hand it to the user.
1510 * If necessary we block.
1511 */
1512
1513static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1514 struct msghdr *msg, size_t len, int flags)
1515{
1516 struct sock *sk = sock->sk;
1517 struct sk_buff *skb;
1518 int copied, err;
0fb375fb 1519 struct sockaddr_ll *sll;
bfd5f4a3 1520 int vnet_hdr_len = 0;
1da177e4
LT
1521
1522 err = -EINVAL;
1523 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1524 goto out;
1525
1526#if 0
1527 /* What error should we return now? EUNATTACH? */
1528 if (pkt_sk(sk)->ifindex < 0)
1529 return -ENODEV;
1530#endif
1531
1da177e4
LT
1532 /*
1533 * Call the generic datagram receiver. This handles all sorts
1534 * of horrible races and re-entrancy so we can forget about it
1535 * in the protocol layers.
1536 *
1537 * Now it will return ENETDOWN, if device have just gone down,
1538 * but then it will block.
1539 */
1540
40d4e3df 1541 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1da177e4
LT
1542
1543 /*
1ce4f28b 1544 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1545 * handles the blocking we don't see and worry about blocking
1546 * retries.
1547 */
1548
8ae55f04 1549 if (skb == NULL)
1da177e4
LT
1550 goto out;
1551
bfd5f4a3
SS
1552 if (pkt_sk(sk)->has_vnet_hdr) {
1553 struct virtio_net_hdr vnet_hdr = { 0 };
1554
1555 err = -EINVAL;
1556 vnet_hdr_len = sizeof(vnet_hdr);
1557 if ((len -= vnet_hdr_len) < 0)
1558 goto out_free;
1559
1560 if (skb_is_gso(skb)) {
1561 struct skb_shared_info *sinfo = skb_shinfo(skb);
1562
1563 /* This is a hint as to how much should be linear. */
1564 vnet_hdr.hdr_len = skb_headlen(skb);
1565 vnet_hdr.gso_size = sinfo->gso_size;
1566 if (sinfo->gso_type & SKB_GSO_TCPV4)
1567 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1568 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1569 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1570 else if (sinfo->gso_type & SKB_GSO_UDP)
1571 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1572 else if (sinfo->gso_type & SKB_GSO_FCOE)
1573 goto out_free;
1574 else
1575 BUG();
1576 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1577 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1578 } else
1579 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1580
1581 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1582 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1583 vnet_hdr.csum_start = skb->csum_start -
1584 skb_headroom(skb);
1585 vnet_hdr.csum_offset = skb->csum_offset;
1586 } /* else everything is zero */
1587
1588 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1589 vnet_hdr_len);
1590 if (err < 0)
1591 goto out_free;
1592 }
1593
0fb375fb
EB
1594 /*
1595 * If the address length field is there to be filled in, we fill
1596 * it in now.
1597 */
1598
ffbc6111 1599 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1600 if (sock->type == SOCK_PACKET)
1601 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1602 else
1603 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1604
1da177e4
LT
1605 /*
1606 * You lose any data beyond the buffer you gave. If it worries a
1607 * user program they can ask the device for its MTU anyway.
1608 */
1609
1610 copied = skb->len;
40d4e3df
ED
1611 if (copied > len) {
1612 copied = len;
1613 msg->msg_flags |= MSG_TRUNC;
1da177e4
LT
1614 }
1615
1616 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1617 if (err)
1618 goto out_free;
1619
3b885787 1620 sock_recv_ts_and_drops(msg, sk, skb);
1da177e4
LT
1621
1622 if (msg->msg_name)
ffbc6111
HX
1623 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1624 msg->msg_namelen);
1da177e4 1625
8dc41944 1626 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1627 struct tpacket_auxdata aux;
1628
1629 aux.tp_status = TP_STATUS_USER;
1630 if (skb->ip_summed == CHECKSUM_PARTIAL)
1631 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1632 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1633 aux.tp_snaplen = skb->len;
1634 aux.tp_mac = 0;
bbe735e4 1635 aux.tp_net = skb_network_offset(skb);
05423b24 1636 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
ffbc6111
HX
1637
1638 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1639 }
1640
1da177e4
LT
1641 /*
1642 * Free or return the buffer as appropriate. Again this
1643 * hides all the races and re-entrancy issues from us.
1644 */
bfd5f4a3 1645 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1da177e4
LT
1646
1647out_free:
1648 skb_free_datagram(sk, skb);
1649out:
1650 return err;
1651}
1652
1da177e4
LT
1653static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1654 int *uaddr_len, int peer)
1655{
1656 struct net_device *dev;
1657 struct sock *sk = sock->sk;
1658
1659 if (peer)
1660 return -EOPNOTSUPP;
1661
1662 uaddr->sa_family = AF_PACKET;
654d1f8a
ED
1663 rcu_read_lock();
1664 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1665 if (dev)
1da177e4 1666 strlcpy(uaddr->sa_data, dev->name, 15);
654d1f8a 1667 else
1da177e4 1668 memset(uaddr->sa_data, 0, 14);
654d1f8a 1669 rcu_read_unlock();
1da177e4
LT
1670 *uaddr_len = sizeof(*uaddr);
1671
1672 return 0;
1673}
1da177e4
LT
1674
1675static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1676 int *uaddr_len, int peer)
1677{
1678 struct net_device *dev;
1679 struct sock *sk = sock->sk;
1680 struct packet_sock *po = pkt_sk(sk);
13cfa97b 1681 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1da177e4
LT
1682
1683 if (peer)
1684 return -EOPNOTSUPP;
1685
1686 sll->sll_family = AF_PACKET;
1687 sll->sll_ifindex = po->ifindex;
1688 sll->sll_protocol = po->num;
654d1f8a
ED
1689 rcu_read_lock();
1690 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1da177e4
LT
1691 if (dev) {
1692 sll->sll_hatype = dev->type;
1693 sll->sll_halen = dev->addr_len;
1694 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1da177e4
LT
1695 } else {
1696 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1697 sll->sll_halen = 0;
1698 }
654d1f8a 1699 rcu_read_unlock();
0fb375fb 1700 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1701
1702 return 0;
1703}
1704
2aeb0b88
WC
1705static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1706 int what)
1da177e4
LT
1707{
1708 switch (i->type) {
1709 case PACKET_MR_MULTICAST:
1710 if (what > 0)
d95ed927 1711 return dev_mc_add(dev, i->addr, i->alen, 0);
1da177e4 1712 else
d95ed927 1713 return dev_mc_delete(dev, i->addr, i->alen, 0);
1da177e4
LT
1714 break;
1715 case PACKET_MR_PROMISC:
2aeb0b88 1716 return dev_set_promiscuity(dev, what);
1da177e4
LT
1717 break;
1718 case PACKET_MR_ALLMULTI:
2aeb0b88 1719 return dev_set_allmulti(dev, what);
1da177e4 1720 break;
d95ed927
EB
1721 case PACKET_MR_UNICAST:
1722 if (what > 0)
ccffad25 1723 return dev_unicast_add(dev, i->addr);
d95ed927 1724 else
ccffad25 1725 return dev_unicast_delete(dev, i->addr);
d95ed927 1726 break;
40d4e3df
ED
1727 default:
1728 break;
1da177e4 1729 }
2aeb0b88 1730 return 0;
1da177e4
LT
1731}
1732
1733static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1734{
40d4e3df 1735 for ( ; i; i = i->next) {
1da177e4
LT
1736 if (i->ifindex == dev->ifindex)
1737 packet_dev_mc(dev, i, what);
1738 }
1739}
1740
0fb375fb 1741static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1742{
1743 struct packet_sock *po = pkt_sk(sk);
1744 struct packet_mclist *ml, *i;
1745 struct net_device *dev;
1746 int err;
1747
1748 rtnl_lock();
1749
1750 err = -ENODEV;
3b1e0a65 1751 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1da177e4
LT
1752 if (!dev)
1753 goto done;
1754
1755 err = -EINVAL;
1756 if (mreq->mr_alen > dev->addr_len)
1757 goto done;
1758
1759 err = -ENOBUFS;
8b3a7005 1760 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1761 if (i == NULL)
1762 goto done;
1763
1764 err = 0;
1765 for (ml = po->mclist; ml; ml = ml->next) {
1766 if (ml->ifindex == mreq->mr_ifindex &&
1767 ml->type == mreq->mr_type &&
1768 ml->alen == mreq->mr_alen &&
1769 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1770 ml->count++;
1771 /* Free the new element ... */
1772 kfree(i);
1773 goto done;
1774 }
1775 }
1776
1777 i->type = mreq->mr_type;
1778 i->ifindex = mreq->mr_ifindex;
1779 i->alen = mreq->mr_alen;
1780 memcpy(i->addr, mreq->mr_address, i->alen);
1781 i->count = 1;
1782 i->next = po->mclist;
1783 po->mclist = i;
2aeb0b88
WC
1784 err = packet_dev_mc(dev, i, 1);
1785 if (err) {
1786 po->mclist = i->next;
1787 kfree(i);
1788 }
1da177e4
LT
1789
1790done:
1791 rtnl_unlock();
1792 return err;
1793}
1794
0fb375fb 1795static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1796{
1797 struct packet_mclist *ml, **mlp;
1798
1799 rtnl_lock();
1800
1801 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1802 if (ml->ifindex == mreq->mr_ifindex &&
1803 ml->type == mreq->mr_type &&
1804 ml->alen == mreq->mr_alen &&
1805 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1806 if (--ml->count == 0) {
1807 struct net_device *dev;
1808 *mlp = ml->next;
ad959e76
ED
1809 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1810 if (dev)
1da177e4 1811 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1812 kfree(ml);
1813 }
1814 rtnl_unlock();
1815 return 0;
1816 }
1817 }
1818 rtnl_unlock();
1819 return -EADDRNOTAVAIL;
1820}
1821
1822static void packet_flush_mclist(struct sock *sk)
1823{
1824 struct packet_sock *po = pkt_sk(sk);
1825 struct packet_mclist *ml;
1826
1827 if (!po->mclist)
1828 return;
1829
1830 rtnl_lock();
1831 while ((ml = po->mclist) != NULL) {
1832 struct net_device *dev;
1833
1834 po->mclist = ml->next;
ad959e76
ED
1835 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1836 if (dev != NULL)
1da177e4 1837 packet_dev_mc(dev, ml, -1);
1da177e4
LT
1838 kfree(ml);
1839 }
1840 rtnl_unlock();
1841}
1da177e4
LT
1842
1843static int
b7058842 1844packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1da177e4
LT
1845{
1846 struct sock *sk = sock->sk;
8dc41944 1847 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1848 int ret;
1849
1850 if (level != SOL_PACKET)
1851 return -ENOPROTOOPT;
1852
69e3c75f 1853 switch (optname) {
1ce4f28b 1854 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1855 case PACKET_DROP_MEMBERSHIP:
1856 {
0fb375fb
EB
1857 struct packet_mreq_max mreq;
1858 int len = optlen;
1859 memset(&mreq, 0, sizeof(mreq));
1860 if (len < sizeof(struct packet_mreq))
1da177e4 1861 return -EINVAL;
0fb375fb
EB
1862 if (len > sizeof(mreq))
1863 len = sizeof(mreq);
40d4e3df 1864 if (copy_from_user(&mreq, optval, len))
1da177e4 1865 return -EFAULT;
0fb375fb
EB
1866 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1867 return -EINVAL;
1da177e4
LT
1868 if (optname == PACKET_ADD_MEMBERSHIP)
1869 ret = packet_mc_add(sk, &mreq);
1870 else
1871 ret = packet_mc_drop(sk, &mreq);
1872 return ret;
1873 }
a2efcfa0 1874
1da177e4
LT
1875#ifdef CONFIG_PACKET_MMAP
1876 case PACKET_RX_RING:
69e3c75f 1877 case PACKET_TX_RING:
1da177e4
LT
1878 {
1879 struct tpacket_req req;
1880
40d4e3df 1881 if (optlen < sizeof(req))
1da177e4 1882 return -EINVAL;
bfd5f4a3
SS
1883 if (pkt_sk(sk)->has_vnet_hdr)
1884 return -EINVAL;
40d4e3df 1885 if (copy_from_user(&req, optval, sizeof(req)))
1da177e4 1886 return -EFAULT;
69e3c75f 1887 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1da177e4
LT
1888 }
1889 case PACKET_COPY_THRESH:
1890 {
1891 int val;
1892
40d4e3df 1893 if (optlen != sizeof(val))
1da177e4 1894 return -EINVAL;
40d4e3df 1895 if (copy_from_user(&val, optval, sizeof(val)))
1da177e4
LT
1896 return -EFAULT;
1897
1898 pkt_sk(sk)->copy_thresh = val;
1899 return 0;
1900 }
bbd6ef87
PM
1901 case PACKET_VERSION:
1902 {
1903 int val;
1904
1905 if (optlen != sizeof(val))
1906 return -EINVAL;
69e3c75f 1907 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
bbd6ef87
PM
1908 return -EBUSY;
1909 if (copy_from_user(&val, optval, sizeof(val)))
1910 return -EFAULT;
1911 switch (val) {
1912 case TPACKET_V1:
1913 case TPACKET_V2:
1914 po->tp_version = val;
1915 return 0;
1916 default:
1917 return -EINVAL;
1918 }
1919 }
8913336a
PM
1920 case PACKET_RESERVE:
1921 {
1922 unsigned int val;
1923
1924 if (optlen != sizeof(val))
1925 return -EINVAL;
69e3c75f 1926 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
8913336a
PM
1927 return -EBUSY;
1928 if (copy_from_user(&val, optval, sizeof(val)))
1929 return -EFAULT;
1930 po->tp_reserve = val;
1931 return 0;
1932 }
69e3c75f
JB
1933 case PACKET_LOSS:
1934 {
1935 unsigned int val;
1936
1937 if (optlen != sizeof(val))
1938 return -EINVAL;
1939 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1940 return -EBUSY;
1941 if (copy_from_user(&val, optval, sizeof(val)))
1942 return -EFAULT;
1943 po->tp_loss = !!val;
1944 return 0;
1945 }
1da177e4 1946#endif
8dc41944
HX
1947 case PACKET_AUXDATA:
1948 {
1949 int val;
1950
1951 if (optlen < sizeof(val))
1952 return -EINVAL;
1953 if (copy_from_user(&val, optval, sizeof(val)))
1954 return -EFAULT;
1955
1956 po->auxdata = !!val;
1957 return 0;
1958 }
80feaacb
PWJ
1959 case PACKET_ORIGDEV:
1960 {
1961 int val;
1962
1963 if (optlen < sizeof(val))
1964 return -EINVAL;
1965 if (copy_from_user(&val, optval, sizeof(val)))
1966 return -EFAULT;
1967
1968 po->origdev = !!val;
1969 return 0;
1970 }
bfd5f4a3
SS
1971 case PACKET_VNET_HDR:
1972 {
1973 int val;
1974
1975 if (sock->type != SOCK_RAW)
1976 return -EINVAL;
1977 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1978 return -EBUSY;
1979 if (optlen < sizeof(val))
1980 return -EINVAL;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983
1984 po->has_vnet_hdr = !!val;
1985 return 0;
1986 }
1da177e4
LT
1987 default:
1988 return -ENOPROTOOPT;
1989 }
1990}
1991
1992static int packet_getsockopt(struct socket *sock, int level, int optname,
1993 char __user *optval, int __user *optlen)
1994{
1995 int len;
8dc41944 1996 int val;
1da177e4
LT
1997 struct sock *sk = sock->sk;
1998 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
1999 void *data;
2000 struct tpacket_stats st;
1da177e4
LT
2001
2002 if (level != SOL_PACKET)
2003 return -ENOPROTOOPT;
2004
8ae55f04
KK
2005 if (get_user(len, optlen))
2006 return -EFAULT;
1da177e4
LT
2007
2008 if (len < 0)
2009 return -EINVAL;
1ce4f28b 2010
69e3c75f 2011 switch (optname) {
1da177e4 2012 case PACKET_STATISTICS:
1da177e4
LT
2013 if (len > sizeof(struct tpacket_stats))
2014 len = sizeof(struct tpacket_stats);
2015 spin_lock_bh(&sk->sk_receive_queue.lock);
2016 st = po->stats;
2017 memset(&po->stats, 0, sizeof(st));
2018 spin_unlock_bh(&sk->sk_receive_queue.lock);
2019 st.tp_packets += st.tp_drops;
2020
8dc41944
HX
2021 data = &st;
2022 break;
2023 case PACKET_AUXDATA:
2024 if (len > sizeof(int))
2025 len = sizeof(int);
2026 val = po->auxdata;
2027
80feaacb
PWJ
2028 data = &val;
2029 break;
2030 case PACKET_ORIGDEV:
2031 if (len > sizeof(int))
2032 len = sizeof(int);
2033 val = po->origdev;
2034
bfd5f4a3
SS
2035 data = &val;
2036 break;
2037 case PACKET_VNET_HDR:
2038 if (len > sizeof(int))
2039 len = sizeof(int);
2040 val = po->has_vnet_hdr;
2041
8dc41944 2042 data = &val;
1da177e4 2043 break;
bbd6ef87
PM
2044#ifdef CONFIG_PACKET_MMAP
2045 case PACKET_VERSION:
2046 if (len > sizeof(int))
2047 len = sizeof(int);
2048 val = po->tp_version;
2049 data = &val;
2050 break;
2051 case PACKET_HDRLEN:
2052 if (len > sizeof(int))
2053 len = sizeof(int);
2054 if (copy_from_user(&val, optval, len))
2055 return -EFAULT;
2056 switch (val) {
2057 case TPACKET_V1:
2058 val = sizeof(struct tpacket_hdr);
2059 break;
2060 case TPACKET_V2:
2061 val = sizeof(struct tpacket2_hdr);
2062 break;
2063 default:
2064 return -EINVAL;
2065 }
2066 data = &val;
2067 break;
8913336a
PM
2068 case PACKET_RESERVE:
2069 if (len > sizeof(unsigned int))
2070 len = sizeof(unsigned int);
2071 val = po->tp_reserve;
2072 data = &val;
2073 break;
69e3c75f
JB
2074 case PACKET_LOSS:
2075 if (len > sizeof(unsigned int))
2076 len = sizeof(unsigned int);
2077 val = po->tp_loss;
2078 data = &val;
2079 break;
bbd6ef87 2080#endif
1da177e4
LT
2081 default:
2082 return -ENOPROTOOPT;
2083 }
2084
8ae55f04
KK
2085 if (put_user(len, optlen))
2086 return -EFAULT;
8dc41944
HX
2087 if (copy_to_user(optval, data, len))
2088 return -EFAULT;
8ae55f04 2089 return 0;
1da177e4
LT
2090}
2091
2092
2093static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2094{
2095 struct sock *sk;
2096 struct hlist_node *node;
ad930650 2097 struct net_device *dev = data;
c346dca1 2098 struct net *net = dev_net(dev);
1da177e4 2099
2aaef4e4
DL
2100 read_lock(&net->packet.sklist_lock);
2101 sk_for_each(sk, node, &net->packet.sklist) {
1da177e4
LT
2102 struct packet_sock *po = pkt_sk(sk);
2103
2104 switch (msg) {
2105 case NETDEV_UNREGISTER:
1da177e4
LT
2106 if (po->mclist)
2107 packet_dev_mclist(dev, po->mclist, -1);
a2efcfa0
DM
2108 /* fallthrough */
2109
1da177e4
LT
2110 case NETDEV_DOWN:
2111 if (dev->ifindex == po->ifindex) {
2112 spin_lock(&po->bind_lock);
2113 if (po->running) {
2114 __dev_remove_pack(&po->prot_hook);
2115 __sock_put(sk);
2116 po->running = 0;
2117 sk->sk_err = ENETDOWN;
2118 if (!sock_flag(sk, SOCK_DEAD))
2119 sk->sk_error_report(sk);
2120 }
2121 if (msg == NETDEV_UNREGISTER) {
2122 po->ifindex = -1;
2123 po->prot_hook.dev = NULL;
2124 }
2125 spin_unlock(&po->bind_lock);
2126 }
2127 break;
2128 case NETDEV_UP:
2129 spin_lock(&po->bind_lock);
2130 if (dev->ifindex == po->ifindex && po->num &&
2131 !po->running) {
2132 dev_add_pack(&po->prot_hook);
2133 sock_hold(sk);
2134 po->running = 1;
2135 }
2136 spin_unlock(&po->bind_lock);
2137 break;
2138 }
2139 }
2aaef4e4 2140 read_unlock(&net->packet.sklist_lock);
1da177e4
LT
2141 return NOTIFY_DONE;
2142}
2143
2144
2145static int packet_ioctl(struct socket *sock, unsigned int cmd,
2146 unsigned long arg)
2147{
2148 struct sock *sk = sock->sk;
2149
69e3c75f 2150 switch (cmd) {
40d4e3df
ED
2151 case SIOCOUTQ:
2152 {
2153 int amount = sk_wmem_alloc_get(sk);
31e6d363 2154
40d4e3df
ED
2155 return put_user(amount, (int __user *)arg);
2156 }
2157 case SIOCINQ:
2158 {
2159 struct sk_buff *skb;
2160 int amount = 0;
2161
2162 spin_lock_bh(&sk->sk_receive_queue.lock);
2163 skb = skb_peek(&sk->sk_receive_queue);
2164 if (skb)
2165 amount = skb->len;
2166 spin_unlock_bh(&sk->sk_receive_queue.lock);
2167 return put_user(amount, (int __user *)arg);
2168 }
2169 case SIOCGSTAMP:
2170 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2171 case SIOCGSTAMPNS:
2172 return sock_get_timestampns(sk, (struct timespec __user *)arg);
1ce4f28b 2173
1da177e4 2174#ifdef CONFIG_INET
40d4e3df
ED
2175 case SIOCADDRT:
2176 case SIOCDELRT:
2177 case SIOCDARP:
2178 case SIOCGARP:
2179 case SIOCSARP:
2180 case SIOCGIFADDR:
2181 case SIOCSIFADDR:
2182 case SIOCGIFBRDADDR:
2183 case SIOCSIFBRDADDR:
2184 case SIOCGIFNETMASK:
2185 case SIOCSIFNETMASK:
2186 case SIOCGIFDSTADDR:
2187 case SIOCSIFDSTADDR:
2188 case SIOCSIFFLAGS:
2189 if (!net_eq(sock_net(sk), &init_net))
2190 return -ENOIOCTLCMD;
2191 return inet_dgram_ops.ioctl(sock, cmd, arg);
1da177e4
LT
2192#endif
2193
40d4e3df
ED
2194 default:
2195 return -ENOIOCTLCMD;
1da177e4
LT
2196 }
2197 return 0;
2198}
2199
2200#ifndef CONFIG_PACKET_MMAP
2201#define packet_mmap sock_no_mmap
2202#define packet_poll datagram_poll
2203#else
2204
40d4e3df 2205static unsigned int packet_poll(struct file *file, struct socket *sock,
1da177e4
LT
2206 poll_table *wait)
2207{
2208 struct sock *sk = sock->sk;
2209 struct packet_sock *po = pkt_sk(sk);
2210 unsigned int mask = datagram_poll(file, sock, wait);
2211
2212 spin_lock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2213 if (po->rx_ring.pg_vec) {
2214 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
1da177e4
LT
2215 mask |= POLLIN | POLLRDNORM;
2216 }
2217 spin_unlock_bh(&sk->sk_receive_queue.lock);
69e3c75f
JB
2218 spin_lock_bh(&sk->sk_write_queue.lock);
2219 if (po->tx_ring.pg_vec) {
2220 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2221 mask |= POLLOUT | POLLWRNORM;
2222 }
2223 spin_unlock_bh(&sk->sk_write_queue.lock);
1da177e4
LT
2224 return mask;
2225}
2226
2227
2228/* Dirty? Well, I still did not learn better way to account
2229 * for user mmaps.
2230 */
2231
2232static void packet_mm_open(struct vm_area_struct *vma)
2233{
2234 struct file *file = vma->vm_file;
40d4e3df 2235 struct socket *sock = file->private_data;
1da177e4 2236 struct sock *sk = sock->sk;
1ce4f28b 2237
1da177e4
LT
2238 if (sk)
2239 atomic_inc(&pkt_sk(sk)->mapped);
2240}
2241
2242static void packet_mm_close(struct vm_area_struct *vma)
2243{
2244 struct file *file = vma->vm_file;
40d4e3df 2245 struct socket *sock = file->private_data;
1da177e4 2246 struct sock *sk = sock->sk;
1ce4f28b 2247
1da177e4
LT
2248 if (sk)
2249 atomic_dec(&pkt_sk(sk)->mapped);
2250}
2251
f0f37e2f 2252static const struct vm_operations_struct packet_mmap_ops = {
40d4e3df
ED
2253 .open = packet_mm_open,
2254 .close = packet_mm_close,
1da177e4
LT
2255};
2256
4ebf0ae2 2257static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1da177e4
LT
2258{
2259 int i;
2260
4ebf0ae2
DM
2261 for (i = 0; i < len; i++) {
2262 if (likely(pg_vec[i]))
2263 free_pages((unsigned long) pg_vec[i], order);
1da177e4
LT
2264 }
2265 kfree(pg_vec);
2266}
2267
4ebf0ae2
DM
2268static inline char *alloc_one_pg_vec_page(unsigned long order)
2269{
719bfeaa
ED
2270 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN;
2271
2272 return (char *) __get_free_pages(gfp_flags, order);
4ebf0ae2
DM
2273}
2274
2275static char **alloc_pg_vec(struct tpacket_req *req, int order)
2276{
2277 unsigned int block_nr = req->tp_block_nr;
2278 char **pg_vec;
2279 int i;
2280
2281 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
2282 if (unlikely(!pg_vec))
2283 goto out;
2284
2285 for (i = 0; i < block_nr; i++) {
2286 pg_vec[i] = alloc_one_pg_vec_page(order);
2287 if (unlikely(!pg_vec[i]))
2288 goto out_free_pgvec;
2289 }
2290
2291out:
2292 return pg_vec;
2293
2294out_free_pgvec:
2295 free_pg_vec(pg_vec, order, block_nr);
2296 pg_vec = NULL;
2297 goto out;
2298}
1da177e4 2299
69e3c75f
JB
2300static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2301 int closing, int tx_ring)
1da177e4
LT
2302{
2303 char **pg_vec = NULL;
2304 struct packet_sock *po = pkt_sk(sk);
0e11c91e 2305 int was_running, order = 0;
69e3c75f
JB
2306 struct packet_ring_buffer *rb;
2307 struct sk_buff_head *rb_queue;
0e11c91e 2308 __be16 num;
69e3c75f 2309 int err;
1ce4f28b 2310
69e3c75f
JB
2311 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2312 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
1da177e4 2313
69e3c75f
JB
2314 err = -EBUSY;
2315 if (!closing) {
2316 if (atomic_read(&po->mapped))
2317 goto out;
2318 if (atomic_read(&rb->pending))
2319 goto out;
2320 }
1da177e4 2321
69e3c75f
JB
2322 if (req->tp_block_nr) {
2323 /* Sanity tests and some calculations */
2324 err = -EBUSY;
2325 if (unlikely(rb->pg_vec))
2326 goto out;
1da177e4 2327
bbd6ef87
PM
2328 switch (po->tp_version) {
2329 case TPACKET_V1:
2330 po->tp_hdrlen = TPACKET_HDRLEN;
2331 break;
2332 case TPACKET_V2:
2333 po->tp_hdrlen = TPACKET2_HDRLEN;
2334 break;
2335 }
2336
69e3c75f 2337 err = -EINVAL;
4ebf0ae2 2338 if (unlikely((int)req->tp_block_size <= 0))
69e3c75f 2339 goto out;
4ebf0ae2 2340 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
69e3c75f 2341 goto out;
8913336a 2342 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
69e3c75f
JB
2343 po->tp_reserve))
2344 goto out;
4ebf0ae2 2345 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
69e3c75f 2346 goto out;
1da177e4 2347
69e3c75f
JB
2348 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2349 if (unlikely(rb->frames_per_block <= 0))
2350 goto out;
2351 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2352 req->tp_frame_nr))
2353 goto out;
1da177e4
LT
2354
2355 err = -ENOMEM;
4ebf0ae2
DM
2356 order = get_order(req->tp_block_size);
2357 pg_vec = alloc_pg_vec(req, order);
2358 if (unlikely(!pg_vec))
1da177e4 2359 goto out;
69e3c75f
JB
2360 }
2361 /* Done */
2362 else {
2363 err = -EINVAL;
4ebf0ae2 2364 if (unlikely(req->tp_frame_nr))
69e3c75f 2365 goto out;
1da177e4
LT
2366 }
2367
2368 lock_sock(sk);
2369
2370 /* Detach socket from network */
2371 spin_lock(&po->bind_lock);
2372 was_running = po->running;
2373 num = po->num;
2374 if (was_running) {
2375 __dev_remove_pack(&po->prot_hook);
2376 po->num = 0;
2377 po->running = 0;
2378 __sock_put(sk);
2379 }
2380 spin_unlock(&po->bind_lock);
1ce4f28b 2381
1da177e4
LT
2382 synchronize_net();
2383
2384 err = -EBUSY;
905db440 2385 mutex_lock(&po->pg_vec_lock);
1da177e4
LT
2386 if (closing || atomic_read(&po->mapped) == 0) {
2387 err = 0;
2388#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
69e3c75f
JB
2389 spin_lock_bh(&rb_queue->lock);
2390 pg_vec = XC(rb->pg_vec, pg_vec);
2391 rb->frame_max = (req->tp_frame_nr - 1);
2392 rb->head = 0;
2393 rb->frame_size = req->tp_frame_size;
2394 spin_unlock_bh(&rb_queue->lock);
2395
2396 order = XC(rb->pg_vec_order, order);
2397 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
2398
2399 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2400 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2401 tpacket_rcv : packet_rcv;
2402 skb_queue_purge(rb_queue);
1da177e4
LT
2403#undef XC
2404 if (atomic_read(&po->mapped))
40d4e3df
ED
2405 pr_err("packet_mmap: vma is busy: %d\n",
2406 atomic_read(&po->mapped));
1da177e4 2407 }
905db440 2408 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2409
2410 spin_lock(&po->bind_lock);
2411 if (was_running && !po->running) {
2412 sock_hold(sk);
2413 po->running = 1;
2414 po->num = num;
2415 dev_add_pack(&po->prot_hook);
2416 }
2417 spin_unlock(&po->bind_lock);
2418
2419 release_sock(sk);
2420
1da177e4
LT
2421 if (pg_vec)
2422 free_pg_vec(pg_vec, order, req->tp_block_nr);
2423out:
2424 return err;
2425}
2426
69e3c75f
JB
2427static int packet_mmap(struct file *file, struct socket *sock,
2428 struct vm_area_struct *vma)
1da177e4
LT
2429{
2430 struct sock *sk = sock->sk;
2431 struct packet_sock *po = pkt_sk(sk);
69e3c75f
JB
2432 unsigned long size, expected_size;
2433 struct packet_ring_buffer *rb;
1da177e4
LT
2434 unsigned long start;
2435 int err = -EINVAL;
2436 int i;
2437
2438 if (vma->vm_pgoff)
2439 return -EINVAL;
2440
905db440 2441 mutex_lock(&po->pg_vec_lock);
69e3c75f
JB
2442
2443 expected_size = 0;
2444 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2445 if (rb->pg_vec) {
2446 expected_size += rb->pg_vec_len
2447 * rb->pg_vec_pages
2448 * PAGE_SIZE;
2449 }
2450 }
2451
2452 if (expected_size == 0)
1da177e4 2453 goto out;
69e3c75f
JB
2454
2455 size = vma->vm_end - vma->vm_start;
2456 if (size != expected_size)
1da177e4
LT
2457 goto out;
2458
1da177e4 2459 start = vma->vm_start;
69e3c75f
JB
2460 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2461 if (rb->pg_vec == NULL)
2462 continue;
2463
2464 for (i = 0; i < rb->pg_vec_len; i++) {
2465 struct page *page = virt_to_page(rb->pg_vec[i]);
2466 int pg_num;
2467
2468 for (pg_num = 0; pg_num < rb->pg_vec_pages;
40d4e3df 2469 pg_num++, page++) {
69e3c75f
JB
2470 err = vm_insert_page(vma, start, page);
2471 if (unlikely(err))
2472 goto out;
2473 start += PAGE_SIZE;
2474 }
4ebf0ae2 2475 }
1da177e4 2476 }
69e3c75f 2477
4ebf0ae2 2478 atomic_inc(&po->mapped);
1da177e4
LT
2479 vma->vm_ops = &packet_mmap_ops;
2480 err = 0;
2481
2482out:
905db440 2483 mutex_unlock(&po->pg_vec_lock);
1da177e4
LT
2484 return err;
2485}
2486#endif
2487
2488
90ddc4f0 2489static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
2490 .family = PF_PACKET,
2491 .owner = THIS_MODULE,
2492 .release = packet_release,
2493 .bind = packet_bind_spkt,
2494 .connect = sock_no_connect,
2495 .socketpair = sock_no_socketpair,
2496 .accept = sock_no_accept,
2497 .getname = packet_getname_spkt,
2498 .poll = datagram_poll,
2499 .ioctl = packet_ioctl,
2500 .listen = sock_no_listen,
2501 .shutdown = sock_no_shutdown,
2502 .setsockopt = sock_no_setsockopt,
2503 .getsockopt = sock_no_getsockopt,
2504 .sendmsg = packet_sendmsg_spkt,
2505 .recvmsg = packet_recvmsg,
2506 .mmap = sock_no_mmap,
2507 .sendpage = sock_no_sendpage,
2508};
1da177e4 2509
90ddc4f0 2510static const struct proto_ops packet_ops = {
1da177e4
LT
2511 .family = PF_PACKET,
2512 .owner = THIS_MODULE,
2513 .release = packet_release,
2514 .bind = packet_bind,
2515 .connect = sock_no_connect,
2516 .socketpair = sock_no_socketpair,
2517 .accept = sock_no_accept,
1ce4f28b 2518 .getname = packet_getname,
1da177e4
LT
2519 .poll = packet_poll,
2520 .ioctl = packet_ioctl,
2521 .listen = sock_no_listen,
2522 .shutdown = sock_no_shutdown,
2523 .setsockopt = packet_setsockopt,
2524 .getsockopt = packet_getsockopt,
2525 .sendmsg = packet_sendmsg,
2526 .recvmsg = packet_recvmsg,
2527 .mmap = packet_mmap,
2528 .sendpage = sock_no_sendpage,
2529};
2530
ec1b4cf7 2531static const struct net_proto_family packet_family_ops = {
1da177e4
LT
2532 .family = PF_PACKET,
2533 .create = packet_create,
2534 .owner = THIS_MODULE,
2535};
2536
2537static struct notifier_block packet_netdev_notifier = {
40d4e3df 2538 .notifier_call = packet_notifier,
1da177e4
LT
2539};
2540
2541#ifdef CONFIG_PROC_FS
d12d01d6 2542static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
1da177e4
LT
2543{
2544 struct sock *s;
2545 struct hlist_node *node;
2546
2aaef4e4 2547 sk_for_each(s, node, &net->packet.sklist) {
1da177e4
LT
2548 if (!off--)
2549 return s;
2550 }
2551 return NULL;
2552}
2553
2554static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
40ccbf52 2555 __acquires(seq_file_net(seq)->packet.sklist_lock)
1da177e4 2556{
e372c414 2557 struct net *net = seq_file_net(seq);
2aaef4e4 2558 read_lock(&net->packet.sklist_lock);
d12d01d6 2559 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
1da177e4
LT
2560}
2561
2562static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563{
1bf40954 2564 struct net *net = seq_file_net(seq);
1da177e4 2565 ++*pos;
1ce4f28b 2566 return (v == SEQ_START_TOKEN)
2aaef4e4 2567 ? sk_head(&net->packet.sklist)
40d4e3df 2568 : sk_next((struct sock *)v) ;
1da177e4
LT
2569}
2570
2571static void packet_seq_stop(struct seq_file *seq, void *v)
40ccbf52 2572 __releases(seq_file_net(seq)->packet.sklist_lock)
1da177e4 2573{
1bf40954 2574 struct net *net = seq_file_net(seq);
2aaef4e4 2575 read_unlock(&net->packet.sklist_lock);
1da177e4
LT
2576}
2577
1ce4f28b 2578static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
2579{
2580 if (v == SEQ_START_TOKEN)
2581 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2582 else {
2583 struct sock *s = v;
2584 const struct packet_sock *po = pkt_sk(s);
2585
2586 seq_printf(seq,
2587 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2588 s,
2589 atomic_read(&s->sk_refcnt),
2590 s->sk_type,
2591 ntohs(po->num),
2592 po->ifindex,
2593 po->running,
2594 atomic_read(&s->sk_rmem_alloc),
2595 sock_i_uid(s),
40d4e3df 2596 sock_i_ino(s));
1da177e4
LT
2597 }
2598
2599 return 0;
2600}
2601
56b3d975 2602static const struct seq_operations packet_seq_ops = {
1da177e4
LT
2603 .start = packet_seq_start,
2604 .next = packet_seq_next,
2605 .stop = packet_seq_stop,
2606 .show = packet_seq_show,
2607};
2608
2609static int packet_seq_open(struct inode *inode, struct file *file)
2610{
e372c414
DL
2611 return seq_open_net(inode, file, &packet_seq_ops,
2612 sizeof(struct seq_net_private));
1da177e4
LT
2613}
2614
da7071d7 2615static const struct file_operations packet_seq_fops = {
1da177e4
LT
2616 .owner = THIS_MODULE,
2617 .open = packet_seq_open,
2618 .read = seq_read,
2619 .llseek = seq_lseek,
e372c414 2620 .release = seq_release_net,
1da177e4
LT
2621};
2622
2623#endif
2624
2c8c1e72 2625static int __net_init packet_net_init(struct net *net)
d12d01d6 2626{
2aaef4e4
DL
2627 rwlock_init(&net->packet.sklist_lock);
2628 INIT_HLIST_HEAD(&net->packet.sklist);
d12d01d6
DL
2629
2630 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2631 return -ENOMEM;
2632
2633 return 0;
2634}
2635
2c8c1e72 2636static void __net_exit packet_net_exit(struct net *net)
d12d01d6
DL
2637{
2638 proc_net_remove(net, "packet");
2639}
2640
2641static struct pernet_operations packet_net_ops = {
2642 .init = packet_net_init,
2643 .exit = packet_net_exit,
2644};
2645
2646
1da177e4
LT
2647static void __exit packet_exit(void)
2648{
1da177e4 2649 unregister_netdevice_notifier(&packet_netdev_notifier);
d12d01d6 2650 unregister_pernet_subsys(&packet_net_ops);
1da177e4
LT
2651 sock_unregister(PF_PACKET);
2652 proto_unregister(&packet_proto);
2653}
2654
2655static int __init packet_init(void)
2656{
2657 int rc = proto_register(&packet_proto, 0);
2658
2659 if (rc != 0)
2660 goto out;
2661
2662 sock_register(&packet_family_ops);
d12d01d6 2663 register_pernet_subsys(&packet_net_ops);
1da177e4 2664 register_netdevice_notifier(&packet_netdev_notifier);
1da177e4
LT
2665out:
2666 return rc;
2667}
2668
2669module_init(packet_init);
2670module_exit(packet_exit);
2671MODULE_LICENSE("GPL");
2672MODULE_ALIAS_NETPROTO(PF_PACKET);