]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/packet/af_packet.c
[PATCH] mark struct file_operations const 7
[net-next-2.6.git] / net / packet / af_packet.c
CommitLineData
1da177e4
LT
1/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
1ce4f28b 14 * Fixes:
1da177e4
LT
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
1ce4f28b 37 * Ulises Alonso : Frame number limit removal and
1da177e4 38 * packet_set_ring memory leak.
0fb375fb
EB
39 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
1ce4f28b 42 * byte arrays at the end of sockaddr_ll
0fb375fb 43 * and packet_mreq.
1da177e4
LT
44 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
1ce4f28b 51
1da177e4
LT
52#include <linux/types.h>
53#include <linux/sched.h>
54#include <linux/mm.h>
4fc268d2 55#include <linux/capability.h>
1da177e4
LT
56#include <linux/fcntl.h>
57#include <linux/socket.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/if_packet.h>
62#include <linux/wireless.h>
ffbc6111 63#include <linux/kernel.h>
1da177e4
LT
64#include <linux/kmod.h>
65#include <net/ip.h>
66#include <net/protocol.h>
67#include <linux/skbuff.h>
68#include <net/sock.h>
69#include <linux/errno.h>
70#include <linux/timer.h>
71#include <asm/system.h>
72#include <asm/uaccess.h>
73#include <asm/ioctls.h>
74#include <asm/page.h>
a1f8e7f7 75#include <asm/cacheflush.h>
1da177e4
LT
76#include <asm/io.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79#include <linux/poll.h>
80#include <linux/module.h>
81#include <linux/init.h>
82
83#ifdef CONFIG_INET
84#include <net/inet_common.h>
85#endif
86
87#define CONFIG_SOCK_PACKET 1
88
89/*
90 Proposed replacement for SIOC{ADD,DEL}MULTI and
91 IFF_PROMISC, IFF_ALLMULTI flags.
92
93 It is more expensive, but I believe,
94 it is really correct solution: reentereble, safe and fault tolerant.
95
96 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
97 reference count and global flag, so that real status is
98 (gflag|(count != 0)), so that we can use obsolete faulty interface
99 not harming clever users.
100 */
101#define CONFIG_PACKET_MULTICAST 1
102
103/*
104 Assumptions:
105 - if device has no dev->hard_header routine, it adds and removes ll header
106 inside itself. In this case ll header is invisible outside of device,
107 but higher levels still should reserve dev->hard_header_len.
108 Some devices are enough clever to reallocate skb, when header
109 will not fit to reserved space (tunnel), another ones are silly
110 (PPP).
111 - packet socket receives packets with pulled ll header,
112 so that SOCK_RAW should push it back.
113
114On receive:
115-----------
116
117Incoming, dev->hard_header!=NULL
118 mac.raw -> ll header
119 data -> data
120
121Outgoing, dev->hard_header!=NULL
122 mac.raw -> ll header
123 data -> ll header
124
125Incoming, dev->hard_header==NULL
126 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
1ce4f28b 127 PPP makes it, that is wrong, because introduce assymetry
1da177e4
LT
128 between rx and tx paths.
129 data -> data
130
131Outgoing, dev->hard_header==NULL
132 mac.raw -> data. ll header is still not built!
133 data -> data
134
135Resume
136 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
137
138
139On transmit:
140------------
141
142dev->hard_header != NULL
143 mac.raw -> ll header
144 data -> ll header
145
146dev->hard_header == NULL (ll header is added by device, we cannot control it)
147 mac.raw -> data
148 data -> data
149
150 We should set nh.raw on output to correct posistion,
151 packet classifier depends on it.
152 */
153
154/* List of all packet sockets. */
155static HLIST_HEAD(packet_sklist);
156static DEFINE_RWLOCK(packet_sklist_lock);
157
158static atomic_t packet_socks_nr;
159
160
161/* Private packet socket structures. */
162
163#ifdef CONFIG_PACKET_MULTICAST
164struct packet_mclist
165{
166 struct packet_mclist *next;
167 int ifindex;
168 int count;
169 unsigned short type;
170 unsigned short alen;
0fb375fb
EB
171 unsigned char addr[MAX_ADDR_LEN];
172};
173/* identical to struct packet_mreq except it has
174 * a longer address field.
175 */
176struct packet_mreq_max
177{
178 int mr_ifindex;
179 unsigned short mr_type;
180 unsigned short mr_alen;
181 unsigned char mr_address[MAX_ADDR_LEN];
1da177e4
LT
182};
183#endif
184#ifdef CONFIG_PACKET_MMAP
185static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
186#endif
187
188static void packet_flush_mclist(struct sock *sk);
189
190struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */
192 struct sock sk;
193 struct tpacket_stats stats;
194#ifdef CONFIG_PACKET_MMAP
195 char * *pg_vec;
196 unsigned int head;
197 unsigned int frames_per_block;
198 unsigned int frame_size;
199 unsigned int frame_max;
200 int copy_thresh;
201#endif
202 struct packet_type prot_hook;
203 spinlock_t bind_lock;
8dc41944
HX
204 unsigned int running:1, /* prot_hook is attached*/
205 auxdata:1;
1da177e4 206 int ifindex; /* bound device */
0e11c91e 207 __be16 num;
1da177e4
LT
208#ifdef CONFIG_PACKET_MULTICAST
209 struct packet_mclist *mclist;
210#endif
211#ifdef CONFIG_PACKET_MMAP
212 atomic_t mapped;
213 unsigned int pg_vec_order;
214 unsigned int pg_vec_pages;
215 unsigned int pg_vec_len;
216#endif
217};
218
ffbc6111
HX
219struct packet_skb_cb {
220 unsigned int origlen;
221 union {
222 struct sockaddr_pkt pkt;
223 struct sockaddr_ll ll;
224 } sa;
225};
226
227#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
8dc41944 228
1da177e4
LT
229#ifdef CONFIG_PACKET_MMAP
230
231static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
232{
233 unsigned int pg_vec_pos, frame_offset;
234 char *frame;
235
236 pg_vec_pos = position / po->frames_per_block;
237 frame_offset = position % po->frames_per_block;
238
239 frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
1ce4f28b 240
1da177e4
LT
241 return frame;
242}
243#endif
244
245static inline struct packet_sock *pkt_sk(struct sock *sk)
246{
247 return (struct packet_sock *)sk;
248}
249
250static void packet_sock_destruct(struct sock *sk)
251{
252 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
253 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
254
255 if (!sock_flag(sk, SOCK_DEAD)) {
256 printk("Attempt to release alive packet socket: %p\n", sk);
257 return;
258 }
259
260 atomic_dec(&packet_socks_nr);
261#ifdef PACKET_REFCNT_DEBUG
262 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
263#endif
264}
265
266
90ddc4f0 267static const struct proto_ops packet_ops;
1da177e4
LT
268
269#ifdef CONFIG_SOCK_PACKET
90ddc4f0 270static const struct proto_ops packet_ops_spkt;
1da177e4 271
f2ccd8fa 272static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
273{
274 struct sock *sk;
275 struct sockaddr_pkt *spkt;
276
277 /*
278 * When we registered the protocol we saved the socket in the data
279 * field for just this event.
280 */
281
282 sk = pt->af_packet_priv;
1ce4f28b 283
1da177e4
LT
284 /*
285 * Yank back the headers [hope the device set this
286 * right or kerboom...]
287 *
288 * Incoming packets have ll header pulled,
289 * push it back.
290 *
291 * For outgoing ones skb->data == skb->mac.raw
292 * so that this procedure is noop.
293 */
294
295 if (skb->pkt_type == PACKET_LOOPBACK)
296 goto out;
297
298 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
299 goto oom;
300
301 /* drop any routing info */
302 dst_release(skb->dst);
303 skb->dst = NULL;
304
84531c24
PO
305 /* drop conntrack reference */
306 nf_reset(skb);
307
ffbc6111 308 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1da177e4
LT
309
310 skb_push(skb, skb->data-skb->mac.raw);
311
312 /*
313 * The SOCK_PACKET socket receives _all_ frames.
314 */
315
316 spkt->spkt_family = dev->type;
317 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
318 spkt->spkt_protocol = skb->protocol;
319
320 /*
321 * Charge the memory to the socket. This is done specifically
322 * to prevent sockets using all the memory up.
323 */
324
325 if (sock_queue_rcv_skb(sk,skb) == 0)
326 return 0;
327
328out:
329 kfree_skb(skb);
330oom:
331 return 0;
332}
333
334
335/*
336 * Output a raw packet to a device layer. This bypasses all the other
337 * protocol layers and you must therefore supply it with a complete frame
338 */
1ce4f28b 339
1da177e4
LT
340static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
341 struct msghdr *msg, size_t len)
342{
343 struct sock *sk = sock->sk;
344 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
345 struct sk_buff *skb;
346 struct net_device *dev;
0e11c91e 347 __be16 proto=0;
1da177e4 348 int err;
1ce4f28b 349
1da177e4 350 /*
1ce4f28b 351 * Get and verify the address.
1da177e4
LT
352 */
353
354 if (saddr)
355 {
356 if (msg->msg_namelen < sizeof(struct sockaddr))
357 return(-EINVAL);
358 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
359 proto=saddr->spkt_protocol;
360 }
361 else
362 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
363
364 /*
1ce4f28b 365 * Find the device first to size check it
1da177e4
LT
366 */
367
368 saddr->spkt_device[13] = 0;
369 dev = dev_get_by_name(saddr->spkt_device);
370 err = -ENODEV;
371 if (dev == NULL)
372 goto out_unlock;
1ce4f28b 373
d5e76b0a
DM
374 err = -ENETDOWN;
375 if (!(dev->flags & IFF_UP))
376 goto out_unlock;
377
1da177e4
LT
378 /*
379 * You may not queue a frame bigger than the mtu. This is the lowest level
380 * raw protocol and you must do your own fragmentation at this level.
381 */
1ce4f28b 382
1da177e4 383 err = -EMSGSIZE;
8ae55f04 384 if (len > dev->mtu + dev->hard_header_len)
1da177e4
LT
385 goto out_unlock;
386
387 err = -ENOBUFS;
388 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
389
390 /*
391 * If the write buffer is full, then tough. At this level the user gets to
392 * deal with the problem - do your own algorithmic backoffs. That's far
393 * more flexible.
394 */
1ce4f28b
YH
395
396 if (skb == NULL)
1da177e4
LT
397 goto out_unlock;
398
399 /*
1ce4f28b 400 * Fill it in
1da177e4 401 */
1ce4f28b 402
1da177e4
LT
403 /* FIXME: Save some space for broken drivers that write a
404 * hard header at transmission time by themselves. PPP is the
405 * notable one here. This should really be fixed at the driver level.
406 */
407 skb_reserve(skb, LL_RESERVED_SPACE(dev));
408 skb->nh.raw = skb->data;
409
410 /* Try to align data part correctly */
411 if (dev->hard_header) {
412 skb->data -= dev->hard_header_len;
413 skb->tail -= dev->hard_header_len;
414 if (len < dev->hard_header_len)
415 skb->nh.raw = skb->data;
416 }
417
418 /* Returns -EFAULT on error */
419 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
420 skb->protocol = proto;
421 skb->dev = dev;
422 skb->priority = sk->sk_priority;
423 if (err)
424 goto out_free;
425
1da177e4
LT
426 /*
427 * Now send it
428 */
429
430 dev_queue_xmit(skb);
431 dev_put(dev);
432 return(len);
433
434out_free:
435 kfree_skb(skb);
436out_unlock:
437 if (dev)
438 dev_put(dev);
439 return err;
440}
441#endif
442
dbcb5855
DM
443static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
444 unsigned int res)
1da177e4
LT
445{
446 struct sk_filter *filter;
fda9ef5d
DM
447
448 rcu_read_lock_bh();
449 filter = rcu_dereference(sk->sk_filter);
dbcb5855
DM
450 if (filter != NULL)
451 res = sk_run_filter(skb, filter->insns, filter->len);
fda9ef5d 452 rcu_read_unlock_bh();
1da177e4 453
dbcb5855 454 return res;
1da177e4
LT
455}
456
457/*
458 This function makes lazy skb cloning in hope that most of packets
459 are discarded by BPF.
460
461 Note tricky part: we DO mangle shared skb! skb->data, skb->len
462 and skb->cb are mangled. It works because (and until) packets
463 falling here are owned by current CPU. Output packets are cloned
464 by dev_queue_xmit_nit(), input packets are processed by net_bh
465 sequencially, so that if we return skb to original state on exit,
466 we will not harm anyone.
467 */
468
f2ccd8fa 469static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
470{
471 struct sock *sk;
472 struct sockaddr_ll *sll;
473 struct packet_sock *po;
474 u8 * skb_head = skb->data;
475 int skb_len = skb->len;
dbcb5855 476 unsigned int snaplen, res;
1da177e4
LT
477
478 if (skb->pkt_type == PACKET_LOOPBACK)
479 goto drop;
480
481 sk = pt->af_packet_priv;
482 po = pkt_sk(sk);
483
484 skb->dev = dev;
485
486 if (dev->hard_header) {
487 /* The device has an explicit notion of ll header,
488 exported to higher levels.
489
490 Otherwise, the device hides datails of it frame
491 structure, so that corresponding packet head
492 never delivered to user.
493 */
494 if (sk->sk_type != SOCK_DGRAM)
495 skb_push(skb, skb->data - skb->mac.raw);
496 else if (skb->pkt_type == PACKET_OUTGOING) {
497 /* Special case: outgoing packets have ll header at head */
498 skb_pull(skb, skb->nh.raw - skb->data);
499 }
500 }
501
502 snaplen = skb->len;
503
dbcb5855
DM
504 res = run_filter(skb, sk, snaplen);
505 if (!res)
fda9ef5d 506 goto drop_n_restore;
dbcb5855
DM
507 if (snaplen > res)
508 snaplen = res;
1da177e4
LT
509
510 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
511 (unsigned)sk->sk_rcvbuf)
512 goto drop_n_acct;
513
514 if (skb_shared(skb)) {
515 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
516 if (nskb == NULL)
517 goto drop_n_acct;
518
519 if (skb_head != skb->data) {
520 skb->data = skb_head;
521 skb->len = skb_len;
522 }
523 kfree_skb(skb);
524 skb = nskb;
525 }
526
ffbc6111
HX
527 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
528 sizeof(skb->cb));
529
530 sll = &PACKET_SKB_CB(skb)->sa.ll;
1da177e4
LT
531 sll->sll_family = AF_PACKET;
532 sll->sll_hatype = dev->type;
533 sll->sll_protocol = skb->protocol;
534 sll->sll_pkttype = skb->pkt_type;
535 sll->sll_ifindex = dev->ifindex;
536 sll->sll_halen = 0;
537
538 if (dev->hard_header_parse)
539 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
540
ffbc6111 541 PACKET_SKB_CB(skb)->origlen = skb->len;
8dc41944 542
1da177e4
LT
543 if (pskb_trim(skb, snaplen))
544 goto drop_n_acct;
545
546 skb_set_owner_r(skb, sk);
547 skb->dev = NULL;
548 dst_release(skb->dst);
549 skb->dst = NULL;
550
84531c24
PO
551 /* drop conntrack reference */
552 nf_reset(skb);
553
1da177e4
LT
554 spin_lock(&sk->sk_receive_queue.lock);
555 po->stats.tp_packets++;
556 __skb_queue_tail(&sk->sk_receive_queue, skb);
557 spin_unlock(&sk->sk_receive_queue.lock);
558 sk->sk_data_ready(sk, skb->len);
559 return 0;
560
561drop_n_acct:
562 spin_lock(&sk->sk_receive_queue.lock);
563 po->stats.tp_drops++;
564 spin_unlock(&sk->sk_receive_queue.lock);
565
566drop_n_restore:
567 if (skb_head != skb->data && skb_shared(skb)) {
568 skb->data = skb_head;
569 skb->len = skb_len;
570 }
571drop:
572 kfree_skb(skb);
573 return 0;
574}
575
576#ifdef CONFIG_PACKET_MMAP
f2ccd8fa 577static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
1da177e4
LT
578{
579 struct sock *sk;
580 struct packet_sock *po;
581 struct sockaddr_ll *sll;
582 struct tpacket_hdr *h;
583 u8 * skb_head = skb->data;
584 int skb_len = skb->len;
dbcb5855 585 unsigned int snaplen, res;
1da177e4
LT
586 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
587 unsigned short macoff, netoff;
588 struct sk_buff *copy_skb = NULL;
589
590 if (skb->pkt_type == PACKET_LOOPBACK)
591 goto drop;
592
593 sk = pt->af_packet_priv;
594 po = pkt_sk(sk);
595
596 if (dev->hard_header) {
597 if (sk->sk_type != SOCK_DGRAM)
598 skb_push(skb, skb->data - skb->mac.raw);
599 else if (skb->pkt_type == PACKET_OUTGOING) {
600 /* Special case: outgoing packets have ll header at head */
601 skb_pull(skb, skb->nh.raw - skb->data);
1da177e4
LT
602 }
603 }
604
8dc41944
HX
605 if (skb->ip_summed == CHECKSUM_PARTIAL)
606 status |= TP_STATUS_CSUMNOTREADY;
607
1da177e4
LT
608 snaplen = skb->len;
609
dbcb5855
DM
610 res = run_filter(skb, sk, snaplen);
611 if (!res)
fda9ef5d 612 goto drop_n_restore;
dbcb5855
DM
613 if (snaplen > res)
614 snaplen = res;
1da177e4
LT
615
616 if (sk->sk_type == SOCK_DGRAM) {
617 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
618 } else {
619 unsigned maclen = skb->nh.raw - skb->data;
620 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
621 macoff = netoff - maclen;
622 }
623
624 if (macoff + snaplen > po->frame_size) {
625 if (po->copy_thresh &&
626 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
627 (unsigned)sk->sk_rcvbuf) {
628 if (skb_shared(skb)) {
629 copy_skb = skb_clone(skb, GFP_ATOMIC);
630 } else {
631 copy_skb = skb_get(skb);
632 skb_head = skb->data;
633 }
634 if (copy_skb)
635 skb_set_owner_r(copy_skb, sk);
636 }
637 snaplen = po->frame_size - macoff;
638 if ((int)snaplen < 0)
639 snaplen = 0;
640 }
1da177e4
LT
641
642 spin_lock(&sk->sk_receive_queue.lock);
643 h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
1ce4f28b 644
1da177e4
LT
645 if (h->tp_status)
646 goto ring_is_full;
647 po->head = po->head != po->frame_max ? po->head+1 : 0;
648 po->stats.tp_packets++;
649 if (copy_skb) {
650 status |= TP_STATUS_COPY;
651 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
652 }
653 if (!po->stats.tp_drops)
654 status &= ~TP_STATUS_LOSING;
655 spin_unlock(&sk->sk_receive_queue.lock);
656
cbe21d8f 657 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
1da177e4
LT
658
659 h->tp_len = skb->len;
660 h->tp_snaplen = snaplen;
661 h->tp_mac = macoff;
662 h->tp_net = netoff;
1ce4f28b 663 if (skb->tstamp.off_sec == 0) {
a61bbcf2 664 __net_timestamp(skb);
1da177e4
LT
665 sock_enable_timestamp(sk);
666 }
325ed823
HX
667 h->tp_sec = skb->tstamp.off_sec;
668 h->tp_usec = skb->tstamp.off_usec;
1da177e4
LT
669
670 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
671 sll->sll_halen = 0;
672 if (dev->hard_header_parse)
673 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
674 sll->sll_family = AF_PACKET;
675 sll->sll_hatype = dev->type;
676 sll->sll_protocol = skb->protocol;
677 sll->sll_pkttype = skb->pkt_type;
678 sll->sll_ifindex = dev->ifindex;
679
680 h->tp_status = status;
e16aa207 681 smp_mb();
1da177e4
LT
682
683 {
684 struct page *p_start, *p_end;
685 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
686
687 p_start = virt_to_page(h);
688 p_end = virt_to_page(h_end);
689 while (p_start <= p_end) {
690 flush_dcache_page(p_start);
691 p_start++;
692 }
693 }
694
695 sk->sk_data_ready(sk, 0);
696
697drop_n_restore:
698 if (skb_head != skb->data && skb_shared(skb)) {
699 skb->data = skb_head;
700 skb->len = skb_len;
701 }
702drop:
1ce4f28b 703 kfree_skb(skb);
1da177e4
LT
704 return 0;
705
706ring_is_full:
707 po->stats.tp_drops++;
708 spin_unlock(&sk->sk_receive_queue.lock);
709
710 sk->sk_data_ready(sk, 0);
711 if (copy_skb)
712 kfree_skb(copy_skb);
713 goto drop_n_restore;
714}
715
716#endif
717
718
719static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
720 struct msghdr *msg, size_t len)
721{
722 struct sock *sk = sock->sk;
723 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
724 struct sk_buff *skb;
725 struct net_device *dev;
0e11c91e 726 __be16 proto;
1da177e4
LT
727 unsigned char *addr;
728 int ifindex, err, reserve = 0;
729
730 /*
1ce4f28b 731 * Get and verify the address.
1da177e4 732 */
1ce4f28b 733
1da177e4
LT
734 if (saddr == NULL) {
735 struct packet_sock *po = pkt_sk(sk);
736
737 ifindex = po->ifindex;
738 proto = po->num;
739 addr = NULL;
740 } else {
741 err = -EINVAL;
742 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
743 goto out;
0fb375fb
EB
744 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
745 goto out;
1da177e4
LT
746 ifindex = saddr->sll_ifindex;
747 proto = saddr->sll_protocol;
748 addr = saddr->sll_addr;
749 }
750
751
752 dev = dev_get_by_index(ifindex);
753 err = -ENXIO;
754 if (dev == NULL)
755 goto out_unlock;
756 if (sock->type == SOCK_RAW)
757 reserve = dev->hard_header_len;
758
d5e76b0a
DM
759 err = -ENETDOWN;
760 if (!(dev->flags & IFF_UP))
761 goto out_unlock;
762
1da177e4
LT
763 err = -EMSGSIZE;
764 if (len > dev->mtu+reserve)
765 goto out_unlock;
766
767 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
768 msg->msg_flags & MSG_DONTWAIT, &err);
769 if (skb==NULL)
770 goto out_unlock;
771
772 skb_reserve(skb, LL_RESERVED_SPACE(dev));
773 skb->nh.raw = skb->data;
774
775 if (dev->hard_header) {
776 int res;
777 err = -EINVAL;
778 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
779 if (sock->type != SOCK_DGRAM) {
780 skb->tail = skb->data;
781 skb->len = 0;
782 } else if (res < 0)
783 goto out_free;
784 }
785
786 /* Returns -EFAULT on error */
787 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
788 if (err)
789 goto out_free;
790
791 skb->protocol = proto;
792 skb->dev = dev;
793 skb->priority = sk->sk_priority;
794
1da177e4
LT
795 /*
796 * Now send it
797 */
798
799 err = dev_queue_xmit(skb);
800 if (err > 0 && (err = net_xmit_errno(err)) != 0)
801 goto out_unlock;
802
803 dev_put(dev);
804
805 return(len);
806
807out_free:
808 kfree_skb(skb);
809out_unlock:
810 if (dev)
811 dev_put(dev);
812out:
813 return err;
814}
815
816/*
817 * Close a PACKET socket. This is fairly simple. We immediately go
818 * to 'closed' state and remove our protocol entry in the device list.
819 */
820
821static int packet_release(struct socket *sock)
822{
823 struct sock *sk = sock->sk;
824 struct packet_sock *po;
825
826 if (!sk)
827 return 0;
828
829 po = pkt_sk(sk);
830
831 write_lock_bh(&packet_sklist_lock);
832 sk_del_node_init(sk);
833 write_unlock_bh(&packet_sklist_lock);
834
835 /*
836 * Unhook packet receive handler.
837 */
838
839 if (po->running) {
840 /*
841 * Remove the protocol hook
842 */
843 dev_remove_pack(&po->prot_hook);
844 po->running = 0;
845 po->num = 0;
846 __sock_put(sk);
847 }
848
849#ifdef CONFIG_PACKET_MULTICAST
850 packet_flush_mclist(sk);
851#endif
852
853#ifdef CONFIG_PACKET_MMAP
854 if (po->pg_vec) {
855 struct tpacket_req req;
856 memset(&req, 0, sizeof(req));
857 packet_set_ring(sk, &req, 1);
858 }
859#endif
860
861 /*
862 * Now the socket is dead. No more input will appear.
863 */
864
865 sock_orphan(sk);
866 sock->sk = NULL;
867
868 /* Purge queues */
869
870 skb_queue_purge(&sk->sk_receive_queue);
871
872 sock_put(sk);
873 return 0;
874}
875
876/*
877 * Attach a packet hook.
878 */
879
0e11c91e 880static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1da177e4
LT
881{
882 struct packet_sock *po = pkt_sk(sk);
883 /*
884 * Detach an existing hook if present.
885 */
886
887 lock_sock(sk);
888
889 spin_lock(&po->bind_lock);
890 if (po->running) {
891 __sock_put(sk);
892 po->running = 0;
893 po->num = 0;
894 spin_unlock(&po->bind_lock);
895 dev_remove_pack(&po->prot_hook);
896 spin_lock(&po->bind_lock);
897 }
898
899 po->num = protocol;
900 po->prot_hook.type = protocol;
901 po->prot_hook.dev = dev;
902
903 po->ifindex = dev ? dev->ifindex : 0;
904
905 if (protocol == 0)
906 goto out_unlock;
907
908 if (dev) {
909 if (dev->flags&IFF_UP) {
910 dev_add_pack(&po->prot_hook);
911 sock_hold(sk);
912 po->running = 1;
913 } else {
914 sk->sk_err = ENETDOWN;
915 if (!sock_flag(sk, SOCK_DEAD))
916 sk->sk_error_report(sk);
917 }
918 } else {
919 dev_add_pack(&po->prot_hook);
920 sock_hold(sk);
921 po->running = 1;
922 }
923
924out_unlock:
925 spin_unlock(&po->bind_lock);
926 release_sock(sk);
927 return 0;
928}
929
930/*
931 * Bind a packet socket to a device
932 */
933
934#ifdef CONFIG_SOCK_PACKET
935
936static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
937{
938 struct sock *sk=sock->sk;
939 char name[15];
940 struct net_device *dev;
941 int err = -ENODEV;
1ce4f28b 942
1da177e4
LT
943 /*
944 * Check legality
945 */
1ce4f28b 946
8ae55f04 947 if (addr_len != sizeof(struct sockaddr))
1da177e4
LT
948 return -EINVAL;
949 strlcpy(name,uaddr->sa_data,sizeof(name));
950
951 dev = dev_get_by_name(name);
952 if (dev) {
953 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
954 dev_put(dev);
955 }
956 return err;
957}
958#endif
959
960static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
961{
962 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
963 struct sock *sk=sock->sk;
964 struct net_device *dev = NULL;
965 int err;
966
967
968 /*
969 * Check legality
970 */
1ce4f28b 971
1da177e4
LT
972 if (addr_len < sizeof(struct sockaddr_ll))
973 return -EINVAL;
974 if (sll->sll_family != AF_PACKET)
975 return -EINVAL;
976
977 if (sll->sll_ifindex) {
978 err = -ENODEV;
979 dev = dev_get_by_index(sll->sll_ifindex);
980 if (dev == NULL)
981 goto out;
982 }
983 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
984 if (dev)
985 dev_put(dev);
986
987out:
988 return err;
989}
990
991static struct proto packet_proto = {
992 .name = "PACKET",
993 .owner = THIS_MODULE,
994 .obj_size = sizeof(struct packet_sock),
995};
996
997/*
1ce4f28b 998 * Create a packet of type SOCK_PACKET.
1da177e4
LT
999 */
1000
1001static int packet_create(struct socket *sock, int protocol)
1002{
1003 struct sock *sk;
1004 struct packet_sock *po;
0e11c91e 1005 __be16 proto = (__force __be16)protocol; /* weird, but documented */
1da177e4
LT
1006 int err;
1007
1008 if (!capable(CAP_NET_RAW))
1009 return -EPERM;
1010 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1011#ifdef CONFIG_SOCK_PACKET
1012 && sock->type != SOCK_PACKET
1013#endif
1014 )
1015 return -ESOCKTNOSUPPORT;
1016
1017 sock->state = SS_UNCONNECTED;
1018
1019 err = -ENOBUFS;
1020 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1021 if (sk == NULL)
1022 goto out;
1023
1024 sock->ops = &packet_ops;
1025#ifdef CONFIG_SOCK_PACKET
1026 if (sock->type == SOCK_PACKET)
1027 sock->ops = &packet_ops_spkt;
1028#endif
1029 sock_init_data(sock, sk);
1030
1031 po = pkt_sk(sk);
1032 sk->sk_family = PF_PACKET;
0e11c91e 1033 po->num = proto;
1da177e4
LT
1034
1035 sk->sk_destruct = packet_sock_destruct;
1036 atomic_inc(&packet_socks_nr);
1037
1038 /*
1039 * Attach a protocol block
1040 */
1041
1042 spin_lock_init(&po->bind_lock);
1043 po->prot_hook.func = packet_rcv;
1044#ifdef CONFIG_SOCK_PACKET
1045 if (sock->type == SOCK_PACKET)
1046 po->prot_hook.func = packet_rcv_spkt;
1047#endif
1048 po->prot_hook.af_packet_priv = sk;
1049
0e11c91e
AV
1050 if (proto) {
1051 po->prot_hook.type = proto;
1da177e4
LT
1052 dev_add_pack(&po->prot_hook);
1053 sock_hold(sk);
1054 po->running = 1;
1055 }
1056
1057 write_lock_bh(&packet_sklist_lock);
1058 sk_add_node(sk, &packet_sklist);
1059 write_unlock_bh(&packet_sklist_lock);
1060 return(0);
1061out:
1062 return err;
1063}
1064
1065/*
1066 * Pull a packet from our receive queue and hand it to the user.
1067 * If necessary we block.
1068 */
1069
1070static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1071 struct msghdr *msg, size_t len, int flags)
1072{
1073 struct sock *sk = sock->sk;
1074 struct sk_buff *skb;
1075 int copied, err;
0fb375fb 1076 struct sockaddr_ll *sll;
1da177e4
LT
1077
1078 err = -EINVAL;
1079 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1080 goto out;
1081
1082#if 0
1083 /* What error should we return now? EUNATTACH? */
1084 if (pkt_sk(sk)->ifindex < 0)
1085 return -ENODEV;
1086#endif
1087
1da177e4
LT
1088 /*
1089 * Call the generic datagram receiver. This handles all sorts
1090 * of horrible races and re-entrancy so we can forget about it
1091 * in the protocol layers.
1092 *
1093 * Now it will return ENETDOWN, if device have just gone down,
1094 * but then it will block.
1095 */
1096
1097 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1098
1099 /*
1ce4f28b 1100 * An error occurred so return it. Because skb_recv_datagram()
1da177e4
LT
1101 * handles the blocking we don't see and worry about blocking
1102 * retries.
1103 */
1104
8ae55f04 1105 if (skb == NULL)
1da177e4
LT
1106 goto out;
1107
0fb375fb
EB
1108 /*
1109 * If the address length field is there to be filled in, we fill
1110 * it in now.
1111 */
1112
ffbc6111 1113 sll = &PACKET_SKB_CB(skb)->sa.ll;
0fb375fb
EB
1114 if (sock->type == SOCK_PACKET)
1115 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1116 else
1117 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1118
1da177e4
LT
1119 /*
1120 * You lose any data beyond the buffer you gave. If it worries a
1121 * user program they can ask the device for its MTU anyway.
1122 */
1123
1124 copied = skb->len;
1125 if (copied > len)
1126 {
1127 copied=len;
1128 msg->msg_flags|=MSG_TRUNC;
1129 }
1130
1131 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1132 if (err)
1133 goto out_free;
1134
1135 sock_recv_timestamp(msg, sk, skb);
1136
1137 if (msg->msg_name)
ffbc6111
HX
1138 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1139 msg->msg_namelen);
1da177e4 1140
8dc41944 1141 if (pkt_sk(sk)->auxdata) {
ffbc6111
HX
1142 struct tpacket_auxdata aux;
1143
1144 aux.tp_status = TP_STATUS_USER;
1145 if (skb->ip_summed == CHECKSUM_PARTIAL)
1146 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1147 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1148 aux.tp_snaplen = skb->len;
1149 aux.tp_mac = 0;
1150 aux.tp_net = skb->nh.raw - skb->data;
1151
1152 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
8dc41944
HX
1153 }
1154
1da177e4
LT
1155 /*
1156 * Free or return the buffer as appropriate. Again this
1157 * hides all the races and re-entrancy issues from us.
1158 */
1159 err = (flags&MSG_TRUNC) ? skb->len : copied;
1160
1161out_free:
1162 skb_free_datagram(sk, skb);
1163out:
1164 return err;
1165}
1166
1167#ifdef CONFIG_SOCK_PACKET
1168static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1169 int *uaddr_len, int peer)
1170{
1171 struct net_device *dev;
1172 struct sock *sk = sock->sk;
1173
1174 if (peer)
1175 return -EOPNOTSUPP;
1176
1177 uaddr->sa_family = AF_PACKET;
1178 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1179 if (dev) {
1180 strlcpy(uaddr->sa_data, dev->name, 15);
1181 dev_put(dev);
1182 } else
1183 memset(uaddr->sa_data, 0, 14);
1184 *uaddr_len = sizeof(*uaddr);
1185
1186 return 0;
1187}
1188#endif
1189
1190static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1191 int *uaddr_len, int peer)
1192{
1193 struct net_device *dev;
1194 struct sock *sk = sock->sk;
1195 struct packet_sock *po = pkt_sk(sk);
1196 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1197
1198 if (peer)
1199 return -EOPNOTSUPP;
1200
1201 sll->sll_family = AF_PACKET;
1202 sll->sll_ifindex = po->ifindex;
1203 sll->sll_protocol = po->num;
1204 dev = dev_get_by_index(po->ifindex);
1205 if (dev) {
1206 sll->sll_hatype = dev->type;
1207 sll->sll_halen = dev->addr_len;
1208 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1209 dev_put(dev);
1210 } else {
1211 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1212 sll->sll_halen = 0;
1213 }
0fb375fb 1214 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1da177e4
LT
1215
1216 return 0;
1217}
1218
1219#ifdef CONFIG_PACKET_MULTICAST
1220static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1221{
1222 switch (i->type) {
1223 case PACKET_MR_MULTICAST:
1224 if (what > 0)
1225 dev_mc_add(dev, i->addr, i->alen, 0);
1226 else
1227 dev_mc_delete(dev, i->addr, i->alen, 0);
1228 break;
1229 case PACKET_MR_PROMISC:
1230 dev_set_promiscuity(dev, what);
1231 break;
1232 case PACKET_MR_ALLMULTI:
1233 dev_set_allmulti(dev, what);
1234 break;
1235 default:;
1236 }
1237}
1238
1239static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1240{
1241 for ( ; i; i=i->next) {
1242 if (i->ifindex == dev->ifindex)
1243 packet_dev_mc(dev, i, what);
1244 }
1245}
1246
0fb375fb 1247static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1248{
1249 struct packet_sock *po = pkt_sk(sk);
1250 struct packet_mclist *ml, *i;
1251 struct net_device *dev;
1252 int err;
1253
1254 rtnl_lock();
1255
1256 err = -ENODEV;
1257 dev = __dev_get_by_index(mreq->mr_ifindex);
1258 if (!dev)
1259 goto done;
1260
1261 err = -EINVAL;
1262 if (mreq->mr_alen > dev->addr_len)
1263 goto done;
1264
1265 err = -ENOBUFS;
8b3a7005 1266 i = kmalloc(sizeof(*i), GFP_KERNEL);
1da177e4
LT
1267 if (i == NULL)
1268 goto done;
1269
1270 err = 0;
1271 for (ml = po->mclist; ml; ml = ml->next) {
1272 if (ml->ifindex == mreq->mr_ifindex &&
1273 ml->type == mreq->mr_type &&
1274 ml->alen == mreq->mr_alen &&
1275 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1276 ml->count++;
1277 /* Free the new element ... */
1278 kfree(i);
1279 goto done;
1280 }
1281 }
1282
1283 i->type = mreq->mr_type;
1284 i->ifindex = mreq->mr_ifindex;
1285 i->alen = mreq->mr_alen;
1286 memcpy(i->addr, mreq->mr_address, i->alen);
1287 i->count = 1;
1288 i->next = po->mclist;
1289 po->mclist = i;
1290 packet_dev_mc(dev, i, +1);
1291
1292done:
1293 rtnl_unlock();
1294 return err;
1295}
1296
0fb375fb 1297static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1da177e4
LT
1298{
1299 struct packet_mclist *ml, **mlp;
1300
1301 rtnl_lock();
1302
1303 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1304 if (ml->ifindex == mreq->mr_ifindex &&
1305 ml->type == mreq->mr_type &&
1306 ml->alen == mreq->mr_alen &&
1307 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1308 if (--ml->count == 0) {
1309 struct net_device *dev;
1310 *mlp = ml->next;
1311 dev = dev_get_by_index(ml->ifindex);
1312 if (dev) {
1313 packet_dev_mc(dev, ml, -1);
1314 dev_put(dev);
1315 }
1316 kfree(ml);
1317 }
1318 rtnl_unlock();
1319 return 0;
1320 }
1321 }
1322 rtnl_unlock();
1323 return -EADDRNOTAVAIL;
1324}
1325
1326static void packet_flush_mclist(struct sock *sk)
1327{
1328 struct packet_sock *po = pkt_sk(sk);
1329 struct packet_mclist *ml;
1330
1331 if (!po->mclist)
1332 return;
1333
1334 rtnl_lock();
1335 while ((ml = po->mclist) != NULL) {
1336 struct net_device *dev;
1337
1338 po->mclist = ml->next;
1339 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1340 packet_dev_mc(dev, ml, -1);
1341 dev_put(dev);
1342 }
1343 kfree(ml);
1344 }
1345 rtnl_unlock();
1346}
1347#endif
1348
1349static int
1350packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1351{
1352 struct sock *sk = sock->sk;
8dc41944 1353 struct packet_sock *po = pkt_sk(sk);
1da177e4
LT
1354 int ret;
1355
1356 if (level != SOL_PACKET)
1357 return -ENOPROTOOPT;
1358
1359 switch(optname) {
1360#ifdef CONFIG_PACKET_MULTICAST
1ce4f28b 1361 case PACKET_ADD_MEMBERSHIP:
1da177e4
LT
1362 case PACKET_DROP_MEMBERSHIP:
1363 {
0fb375fb
EB
1364 struct packet_mreq_max mreq;
1365 int len = optlen;
1366 memset(&mreq, 0, sizeof(mreq));
1367 if (len < sizeof(struct packet_mreq))
1da177e4 1368 return -EINVAL;
0fb375fb
EB
1369 if (len > sizeof(mreq))
1370 len = sizeof(mreq);
1371 if (copy_from_user(&mreq,optval,len))
1da177e4 1372 return -EFAULT;
0fb375fb
EB
1373 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1374 return -EINVAL;
1da177e4
LT
1375 if (optname == PACKET_ADD_MEMBERSHIP)
1376 ret = packet_mc_add(sk, &mreq);
1377 else
1378 ret = packet_mc_drop(sk, &mreq);
1379 return ret;
1380 }
1381#endif
1382#ifdef CONFIG_PACKET_MMAP
1383 case PACKET_RX_RING:
1384 {
1385 struct tpacket_req req;
1386
1387 if (optlen<sizeof(req))
1388 return -EINVAL;
1389 if (copy_from_user(&req,optval,sizeof(req)))
1390 return -EFAULT;
1391 return packet_set_ring(sk, &req, 0);
1392 }
1393 case PACKET_COPY_THRESH:
1394 {
1395 int val;
1396
1397 if (optlen!=sizeof(val))
1398 return -EINVAL;
1399 if (copy_from_user(&val,optval,sizeof(val)))
1400 return -EFAULT;
1401
1402 pkt_sk(sk)->copy_thresh = val;
1403 return 0;
1404 }
1405#endif
8dc41944
HX
1406 case PACKET_AUXDATA:
1407 {
1408 int val;
1409
1410 if (optlen < sizeof(val))
1411 return -EINVAL;
1412 if (copy_from_user(&val, optval, sizeof(val)))
1413 return -EFAULT;
1414
1415 po->auxdata = !!val;
1416 return 0;
1417 }
1da177e4
LT
1418 default:
1419 return -ENOPROTOOPT;
1420 }
1421}
1422
1423static int packet_getsockopt(struct socket *sock, int level, int optname,
1424 char __user *optval, int __user *optlen)
1425{
1426 int len;
8dc41944 1427 int val;
1da177e4
LT
1428 struct sock *sk = sock->sk;
1429 struct packet_sock *po = pkt_sk(sk);
8dc41944
HX
1430 void *data;
1431 struct tpacket_stats st;
1da177e4
LT
1432
1433 if (level != SOL_PACKET)
1434 return -ENOPROTOOPT;
1435
8ae55f04
KK
1436 if (get_user(len, optlen))
1437 return -EFAULT;
1da177e4
LT
1438
1439 if (len < 0)
1440 return -EINVAL;
1ce4f28b 1441
1da177e4
LT
1442 switch(optname) {
1443 case PACKET_STATISTICS:
1da177e4
LT
1444 if (len > sizeof(struct tpacket_stats))
1445 len = sizeof(struct tpacket_stats);
1446 spin_lock_bh(&sk->sk_receive_queue.lock);
1447 st = po->stats;
1448 memset(&po->stats, 0, sizeof(st));
1449 spin_unlock_bh(&sk->sk_receive_queue.lock);
1450 st.tp_packets += st.tp_drops;
1451
8dc41944
HX
1452 data = &st;
1453 break;
1454 case PACKET_AUXDATA:
1455 if (len > sizeof(int))
1456 len = sizeof(int);
1457 val = po->auxdata;
1458
1459 data = &val;
1da177e4 1460 break;
1da177e4
LT
1461 default:
1462 return -ENOPROTOOPT;
1463 }
1464
8ae55f04
KK
1465 if (put_user(len, optlen))
1466 return -EFAULT;
8dc41944
HX
1467 if (copy_to_user(optval, data, len))
1468 return -EFAULT;
8ae55f04 1469 return 0;
1da177e4
LT
1470}
1471
1472
1473static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1474{
1475 struct sock *sk;
1476 struct hlist_node *node;
1477 struct net_device *dev = (struct net_device*)data;
1478
1479 read_lock(&packet_sklist_lock);
1480 sk_for_each(sk, node, &packet_sklist) {
1481 struct packet_sock *po = pkt_sk(sk);
1482
1483 switch (msg) {
1484 case NETDEV_UNREGISTER:
1485#ifdef CONFIG_PACKET_MULTICAST
1486 if (po->mclist)
1487 packet_dev_mclist(dev, po->mclist, -1);
1488 // fallthrough
1489#endif
1490 case NETDEV_DOWN:
1491 if (dev->ifindex == po->ifindex) {
1492 spin_lock(&po->bind_lock);
1493 if (po->running) {
1494 __dev_remove_pack(&po->prot_hook);
1495 __sock_put(sk);
1496 po->running = 0;
1497 sk->sk_err = ENETDOWN;
1498 if (!sock_flag(sk, SOCK_DEAD))
1499 sk->sk_error_report(sk);
1500 }
1501 if (msg == NETDEV_UNREGISTER) {
1502 po->ifindex = -1;
1503 po->prot_hook.dev = NULL;
1504 }
1505 spin_unlock(&po->bind_lock);
1506 }
1507 break;
1508 case NETDEV_UP:
1509 spin_lock(&po->bind_lock);
1510 if (dev->ifindex == po->ifindex && po->num &&
1511 !po->running) {
1512 dev_add_pack(&po->prot_hook);
1513 sock_hold(sk);
1514 po->running = 1;
1515 }
1516 spin_unlock(&po->bind_lock);
1517 break;
1518 }
1519 }
1520 read_unlock(&packet_sklist_lock);
1521 return NOTIFY_DONE;
1522}
1523
1524
1525static int packet_ioctl(struct socket *sock, unsigned int cmd,
1526 unsigned long arg)
1527{
1528 struct sock *sk = sock->sk;
1529
1530 switch(cmd) {
1531 case SIOCOUTQ:
1532 {
1533 int amount = atomic_read(&sk->sk_wmem_alloc);
1534 return put_user(amount, (int __user *)arg);
1535 }
1536 case SIOCINQ:
1537 {
1538 struct sk_buff *skb;
1539 int amount = 0;
1540
1541 spin_lock_bh(&sk->sk_receive_queue.lock);
1542 skb = skb_peek(&sk->sk_receive_queue);
1543 if (skb)
1544 amount = skb->len;
1545 spin_unlock_bh(&sk->sk_receive_queue.lock);
1546 return put_user(amount, (int __user *)arg);
1547 }
1548 case SIOCGSTAMP:
1549 return sock_get_timestamp(sk, (struct timeval __user *)arg);
1ce4f28b 1550
1da177e4
LT
1551#ifdef CONFIG_INET
1552 case SIOCADDRT:
1553 case SIOCDELRT:
1554 case SIOCDARP:
1555 case SIOCGARP:
1556 case SIOCSARP:
1557 case SIOCGIFADDR:
1558 case SIOCSIFADDR:
1559 case SIOCGIFBRDADDR:
1560 case SIOCSIFBRDADDR:
1561 case SIOCGIFNETMASK:
1562 case SIOCSIFNETMASK:
1563 case SIOCGIFDSTADDR:
1564 case SIOCSIFDSTADDR:
1565 case SIOCSIFFLAGS:
1566 return inet_dgram_ops.ioctl(sock, cmd, arg);
1567#endif
1568
1569 default:
b5e5fa5e 1570 return -ENOIOCTLCMD;
1da177e4
LT
1571 }
1572 return 0;
1573}
1574
1575#ifndef CONFIG_PACKET_MMAP
1576#define packet_mmap sock_no_mmap
1577#define packet_poll datagram_poll
1578#else
1579
1580static unsigned int packet_poll(struct file * file, struct socket *sock,
1581 poll_table *wait)
1582{
1583 struct sock *sk = sock->sk;
1584 struct packet_sock *po = pkt_sk(sk);
1585 unsigned int mask = datagram_poll(file, sock, wait);
1586
1587 spin_lock_bh(&sk->sk_receive_queue.lock);
1588 if (po->pg_vec) {
1589 unsigned last = po->head ? po->head-1 : po->frame_max;
1590 struct tpacket_hdr *h;
1591
1592 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1593
1594 if (h->tp_status)
1595 mask |= POLLIN | POLLRDNORM;
1596 }
1597 spin_unlock_bh(&sk->sk_receive_queue.lock);
1598 return mask;
1599}
1600
1601
1602/* Dirty? Well, I still did not learn better way to account
1603 * for user mmaps.
1604 */
1605
1606static void packet_mm_open(struct vm_area_struct *vma)
1607{
1608 struct file *file = vma->vm_file;
b69aee04 1609 struct socket * sock = file->private_data;
1da177e4 1610 struct sock *sk = sock->sk;
1ce4f28b 1611
1da177e4
LT
1612 if (sk)
1613 atomic_inc(&pkt_sk(sk)->mapped);
1614}
1615
1616static void packet_mm_close(struct vm_area_struct *vma)
1617{
1618 struct file *file = vma->vm_file;
b69aee04 1619 struct socket * sock = file->private_data;
1da177e4 1620 struct sock *sk = sock->sk;
1ce4f28b 1621
1da177e4
LT
1622 if (sk)
1623 atomic_dec(&pkt_sk(sk)->mapped);
1624}
1625
1626static struct vm_operations_struct packet_mmap_ops = {
1627 .open = packet_mm_open,
1628 .close =packet_mm_close,
1629};
1630
1631static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1632{
1633 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1634}
1635
4ebf0ae2 1636static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
1da177e4
LT
1637{
1638 int i;
1639
4ebf0ae2
DM
1640 for (i = 0; i < len; i++) {
1641 if (likely(pg_vec[i]))
1642 free_pages((unsigned long) pg_vec[i], order);
1da177e4
LT
1643 }
1644 kfree(pg_vec);
1645}
1646
4ebf0ae2
DM
1647static inline char *alloc_one_pg_vec_page(unsigned long order)
1648{
1649 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1650 order);
1651}
1652
1653static char **alloc_pg_vec(struct tpacket_req *req, int order)
1654{
1655 unsigned int block_nr = req->tp_block_nr;
1656 char **pg_vec;
1657 int i;
1658
1659 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1660 if (unlikely(!pg_vec))
1661 goto out;
1662
1663 for (i = 0; i < block_nr; i++) {
1664 pg_vec[i] = alloc_one_pg_vec_page(order);
1665 if (unlikely(!pg_vec[i]))
1666 goto out_free_pgvec;
1667 }
1668
1669out:
1670 return pg_vec;
1671
1672out_free_pgvec:
1673 free_pg_vec(pg_vec, order, block_nr);
1674 pg_vec = NULL;
1675 goto out;
1676}
1da177e4
LT
1677
1678static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1679{
1680 char **pg_vec = NULL;
1681 struct packet_sock *po = pkt_sk(sk);
0e11c91e
AV
1682 int was_running, order = 0;
1683 __be16 num;
1da177e4 1684 int err = 0;
1ce4f28b 1685
1da177e4
LT
1686 if (req->tp_block_nr) {
1687 int i, l;
1688
1689 /* Sanity tests and some calculations */
1690
4ebf0ae2 1691 if (unlikely(po->pg_vec))
1da177e4
LT
1692 return -EBUSY;
1693
4ebf0ae2 1694 if (unlikely((int)req->tp_block_size <= 0))
1da177e4 1695 return -EINVAL;
4ebf0ae2 1696 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1da177e4 1697 return -EINVAL;
4ebf0ae2 1698 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
1da177e4 1699 return -EINVAL;
4ebf0ae2 1700 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1da177e4
LT
1701 return -EINVAL;
1702
1703 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
4ebf0ae2 1704 if (unlikely(po->frames_per_block <= 0))
1da177e4 1705 return -EINVAL;
4ebf0ae2
DM
1706 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1707 req->tp_frame_nr))
1da177e4 1708 return -EINVAL;
1da177e4
LT
1709
1710 err = -ENOMEM;
4ebf0ae2
DM
1711 order = get_order(req->tp_block_size);
1712 pg_vec = alloc_pg_vec(req, order);
1713 if (unlikely(!pg_vec))
1da177e4 1714 goto out;
1da177e4
LT
1715
1716 l = 0;
4ebf0ae2 1717 for (i = 0; i < req->tp_block_nr; i++) {
1da177e4
LT
1718 char *ptr = pg_vec[i];
1719 struct tpacket_hdr *header;
1720 int k;
1721
4ebf0ae2
DM
1722 for (k = 0; k < po->frames_per_block; k++) {
1723 header = (struct tpacket_hdr *) ptr;
1da177e4
LT
1724 header->tp_status = TP_STATUS_KERNEL;
1725 ptr += req->tp_frame_size;
1726 }
1727 }
1728 /* Done */
1729 } else {
4ebf0ae2 1730 if (unlikely(req->tp_frame_nr))
1da177e4
LT
1731 return -EINVAL;
1732 }
1733
1734 lock_sock(sk);
1735
1736 /* Detach socket from network */
1737 spin_lock(&po->bind_lock);
1738 was_running = po->running;
1739 num = po->num;
1740 if (was_running) {
1741 __dev_remove_pack(&po->prot_hook);
1742 po->num = 0;
1743 po->running = 0;
1744 __sock_put(sk);
1745 }
1746 spin_unlock(&po->bind_lock);
1ce4f28b 1747
1da177e4
LT
1748 synchronize_net();
1749
1750 err = -EBUSY;
1751 if (closing || atomic_read(&po->mapped) == 0) {
1752 err = 0;
1753#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1754
1755 spin_lock_bh(&sk->sk_receive_queue.lock);
1756 pg_vec = XC(po->pg_vec, pg_vec);
4ebf0ae2 1757 po->frame_max = (req->tp_frame_nr - 1);
1da177e4
LT
1758 po->head = 0;
1759 po->frame_size = req->tp_frame_size;
1760 spin_unlock_bh(&sk->sk_receive_queue.lock);
1761
1762 order = XC(po->pg_vec_order, order);
1763 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1764
1765 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1766 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1767 skb_queue_purge(&sk->sk_receive_queue);
1768#undef XC
1769 if (atomic_read(&po->mapped))
1770 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1771 }
1772
1773 spin_lock(&po->bind_lock);
1774 if (was_running && !po->running) {
1775 sock_hold(sk);
1776 po->running = 1;
1777 po->num = num;
1778 dev_add_pack(&po->prot_hook);
1779 }
1780 spin_unlock(&po->bind_lock);
1781
1782 release_sock(sk);
1783
1da177e4
LT
1784 if (pg_vec)
1785 free_pg_vec(pg_vec, order, req->tp_block_nr);
1786out:
1787 return err;
1788}
1789
1790static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1791{
1792 struct sock *sk = sock->sk;
1793 struct packet_sock *po = pkt_sk(sk);
1794 unsigned long size;
1795 unsigned long start;
1796 int err = -EINVAL;
1797 int i;
1798
1799 if (vma->vm_pgoff)
1800 return -EINVAL;
1801
1802 size = vma->vm_end - vma->vm_start;
1803
1804 lock_sock(sk);
1805 if (po->pg_vec == NULL)
1806 goto out;
1807 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1808 goto out;
1809
1da177e4 1810 start = vma->vm_start;
4ebf0ae2
DM
1811 for (i = 0; i < po->pg_vec_len; i++) {
1812 struct page *page = virt_to_page(po->pg_vec[i]);
1813 int pg_num;
1814
1815 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1816 err = vm_insert_page(vma, start, page);
1817 if (unlikely(err))
1818 goto out;
1819 start += PAGE_SIZE;
1820 }
1da177e4 1821 }
4ebf0ae2 1822 atomic_inc(&po->mapped);
1da177e4
LT
1823 vma->vm_ops = &packet_mmap_ops;
1824 err = 0;
1825
1826out:
1827 release_sock(sk);
1828 return err;
1829}
1830#endif
1831
1832
1833#ifdef CONFIG_SOCK_PACKET
90ddc4f0 1834static const struct proto_ops packet_ops_spkt = {
1da177e4
LT
1835 .family = PF_PACKET,
1836 .owner = THIS_MODULE,
1837 .release = packet_release,
1838 .bind = packet_bind_spkt,
1839 .connect = sock_no_connect,
1840 .socketpair = sock_no_socketpair,
1841 .accept = sock_no_accept,
1842 .getname = packet_getname_spkt,
1843 .poll = datagram_poll,
1844 .ioctl = packet_ioctl,
1845 .listen = sock_no_listen,
1846 .shutdown = sock_no_shutdown,
1847 .setsockopt = sock_no_setsockopt,
1848 .getsockopt = sock_no_getsockopt,
1849 .sendmsg = packet_sendmsg_spkt,
1850 .recvmsg = packet_recvmsg,
1851 .mmap = sock_no_mmap,
1852 .sendpage = sock_no_sendpage,
1853};
1854#endif
1855
90ddc4f0 1856static const struct proto_ops packet_ops = {
1da177e4
LT
1857 .family = PF_PACKET,
1858 .owner = THIS_MODULE,
1859 .release = packet_release,
1860 .bind = packet_bind,
1861 .connect = sock_no_connect,
1862 .socketpair = sock_no_socketpair,
1863 .accept = sock_no_accept,
1ce4f28b 1864 .getname = packet_getname,
1da177e4
LT
1865 .poll = packet_poll,
1866 .ioctl = packet_ioctl,
1867 .listen = sock_no_listen,
1868 .shutdown = sock_no_shutdown,
1869 .setsockopt = packet_setsockopt,
1870 .getsockopt = packet_getsockopt,
1871 .sendmsg = packet_sendmsg,
1872 .recvmsg = packet_recvmsg,
1873 .mmap = packet_mmap,
1874 .sendpage = sock_no_sendpage,
1875};
1876
1877static struct net_proto_family packet_family_ops = {
1878 .family = PF_PACKET,
1879 .create = packet_create,
1880 .owner = THIS_MODULE,
1881};
1882
1883static struct notifier_block packet_netdev_notifier = {
1884 .notifier_call =packet_notifier,
1885};
1886
1887#ifdef CONFIG_PROC_FS
1888static inline struct sock *packet_seq_idx(loff_t off)
1889{
1890 struct sock *s;
1891 struct hlist_node *node;
1892
1893 sk_for_each(s, node, &packet_sklist) {
1894 if (!off--)
1895 return s;
1896 }
1897 return NULL;
1898}
1899
1900static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1901{
1902 read_lock(&packet_sklist_lock);
1903 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1904}
1905
1906static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1907{
1908 ++*pos;
1ce4f28b
YH
1909 return (v == SEQ_START_TOKEN)
1910 ? sk_head(&packet_sklist)
1da177e4
LT
1911 : sk_next((struct sock*)v) ;
1912}
1913
1914static void packet_seq_stop(struct seq_file *seq, void *v)
1915{
1ce4f28b 1916 read_unlock(&packet_sklist_lock);
1da177e4
LT
1917}
1918
1ce4f28b 1919static int packet_seq_show(struct seq_file *seq, void *v)
1da177e4
LT
1920{
1921 if (v == SEQ_START_TOKEN)
1922 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1923 else {
1924 struct sock *s = v;
1925 const struct packet_sock *po = pkt_sk(s);
1926
1927 seq_printf(seq,
1928 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1929 s,
1930 atomic_read(&s->sk_refcnt),
1931 s->sk_type,
1932 ntohs(po->num),
1933 po->ifindex,
1934 po->running,
1935 atomic_read(&s->sk_rmem_alloc),
1936 sock_i_uid(s),
1937 sock_i_ino(s) );
1938 }
1939
1940 return 0;
1941}
1942
1943static struct seq_operations packet_seq_ops = {
1944 .start = packet_seq_start,
1945 .next = packet_seq_next,
1946 .stop = packet_seq_stop,
1947 .show = packet_seq_show,
1948};
1949
1950static int packet_seq_open(struct inode *inode, struct file *file)
1951{
1952 return seq_open(file, &packet_seq_ops);
1953}
1954
1955static struct file_operations packet_seq_fops = {
1956 .owner = THIS_MODULE,
1957 .open = packet_seq_open,
1958 .read = seq_read,
1959 .llseek = seq_lseek,
1960 .release = seq_release,
1961};
1962
1963#endif
1964
1965static void __exit packet_exit(void)
1966{
1967 proc_net_remove("packet");
1968 unregister_netdevice_notifier(&packet_netdev_notifier);
1969 sock_unregister(PF_PACKET);
1970 proto_unregister(&packet_proto);
1971}
1972
1973static int __init packet_init(void)
1974{
1975 int rc = proto_register(&packet_proto, 0);
1976
1977 if (rc != 0)
1978 goto out;
1979
1980 sock_register(&packet_family_ops);
1981 register_netdevice_notifier(&packet_netdev_notifier);
1982 proc_net_fops_create("packet", 0, &packet_seq_fops);
1983out:
1984 return rc;
1985}
1986
1987module_init(packet_init);
1988module_exit(packet_exit);
1989MODULE_LICENSE("GPL");
1990MODULE_ALIAS_NETPROTO(PF_PACKET);