]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/socket.c
tg3: Update version to 3.112
[net-next-2.6.git] / net / socket.c
CommitLineData
1da177e4
LT
1/*
2 * NET An implementation of the SOCKET network access protocol.
3 *
4 * Version: @(#)socket.c 1.1.93 18/02/95
5 *
6 * Authors: Orest Zborowski, <obz@Kodak.COM>
02c30a84 7 * Ross Biro
1da177e4
LT
8 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9 *
10 * Fixes:
11 * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12 * shutdown()
13 * Alan Cox : verify_area() fixes
14 * Alan Cox : Removed DDI
15 * Jonathan Kamens : SOCK_DGRAM reconnect bug
16 * Alan Cox : Moved a load of checks to the very
17 * top level.
18 * Alan Cox : Move address structures to/from user
19 * mode above the protocol layers.
20 * Rob Janssen : Allow 0 length sends.
21 * Alan Cox : Asynchronous I/O support (cribbed from the
22 * tty drivers).
23 * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24 * Jeff Uphoff : Made max number of sockets command-line
25 * configurable.
26 * Matti Aarnio : Made the number of sockets dynamic,
27 * to be allocated when needed, and mr.
28 * Uphoff's max is used as max to be
29 * allowed to allocate.
30 * Linus : Argh. removed all the socket allocation
31 * altogether: it's in the inode now.
32 * Alan Cox : Made sock_alloc()/sock_release() public
33 * for NetROM and future kernel nfsd type
34 * stuff.
35 * Alan Cox : sendmsg/recvmsg basics.
36 * Tom Dyas : Export net symbols.
37 * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38 * Alan Cox : Added thread locking to sys_* calls
39 * for sockets. May have errors at the
40 * moment.
41 * Kevin Buhr : Fixed the dumb errors in the above.
42 * Andi Kleen : Some small cleanups, optimizations,
43 * and fixed a copy_from_user() bug.
44 * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
89bddce5 45 * Tigran Aivazian : Made listen(2) backlog sanity checks
1da177e4
LT
46 * protocol-independent
47 *
48 *
49 * This program is free software; you can redistribute it and/or
50 * modify it under the terms of the GNU General Public License
51 * as published by the Free Software Foundation; either version
52 * 2 of the License, or (at your option) any later version.
53 *
54 *
55 * This module is effectively the top level interface to the BSD socket
89bddce5 56 * paradigm.
1da177e4
LT
57 *
58 * Based upon Swansea University Computer Society NET3.039
59 */
60
1da177e4 61#include <linux/mm.h>
1da177e4
LT
62#include <linux/socket.h>
63#include <linux/file.h>
64#include <linux/net.h>
65#include <linux/interrupt.h>
aaca0bdc 66#include <linux/thread_info.h>
55737fda 67#include <linux/rcupdate.h>
1da177e4
LT
68#include <linux/netdevice.h>
69#include <linux/proc_fs.h>
70#include <linux/seq_file.h>
4a3e2f71 71#include <linux/mutex.h>
1da177e4
LT
72#include <linux/wanrouter.h>
73#include <linux/if_bridge.h>
20380731
ACM
74#include <linux/if_frad.h>
75#include <linux/if_vlan.h>
1da177e4
LT
76#include <linux/init.h>
77#include <linux/poll.h>
78#include <linux/cache.h>
79#include <linux/module.h>
80#include <linux/highmem.h>
1da177e4
LT
81#include <linux/mount.h>
82#include <linux/security.h>
83#include <linux/syscalls.h>
84#include <linux/compat.h>
85#include <linux/kmod.h>
3ec3b2fb 86#include <linux/audit.h>
d86b5e0e 87#include <linux/wireless.h>
1b8d7ae4 88#include <linux/nsproxy.h>
1fd7317d 89#include <linux/magic.h>
5a0e3ad6 90#include <linux/slab.h>
1da177e4
LT
91
92#include <asm/uaccess.h>
93#include <asm/unistd.h>
94
95#include <net/compat.h>
87de87d5 96#include <net/wext.h>
f8451725 97#include <net/cls_cgroup.h>
1da177e4
LT
98
99#include <net/sock.h>
100#include <linux/netfilter.h>
101
6b96018b
AB
102#include <linux/if_tun.h>
103#include <linux/ipv6_route.h>
104#include <linux/route.h>
6b96018b
AB
105#include <linux/sockios.h>
106#include <linux/atalk.h>
107
1da177e4 108static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
027445c3
BP
109static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
110 unsigned long nr_segs, loff_t pos);
111static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
112 unsigned long nr_segs, loff_t pos);
89bddce5 113static int sock_mmap(struct file *file, struct vm_area_struct *vma);
1da177e4
LT
114
115static int sock_close(struct inode *inode, struct file *file);
116static unsigned int sock_poll(struct file *file,
117 struct poll_table_struct *wait);
89bddce5 118static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
89bbfc95
SP
119#ifdef CONFIG_COMPAT
120static long compat_sock_ioctl(struct file *file,
89bddce5 121 unsigned int cmd, unsigned long arg);
89bbfc95 122#endif
1da177e4 123static int sock_fasync(int fd, struct file *filp, int on);
1da177e4
LT
124static ssize_t sock_sendpage(struct file *file, struct page *page,
125 int offset, size_t size, loff_t *ppos, int more);
9c55e01c 126static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 127 struct pipe_inode_info *pipe, size_t len,
9c55e01c 128 unsigned int flags);
1da177e4 129
1da177e4
LT
130/*
131 * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
132 * in the operation structures but are done directly via the socketcall() multiplexor.
133 */
134
da7071d7 135static const struct file_operations socket_file_ops = {
1da177e4
LT
136 .owner = THIS_MODULE,
137 .llseek = no_llseek,
138 .aio_read = sock_aio_read,
139 .aio_write = sock_aio_write,
140 .poll = sock_poll,
141 .unlocked_ioctl = sock_ioctl,
89bbfc95
SP
142#ifdef CONFIG_COMPAT
143 .compat_ioctl = compat_sock_ioctl,
144#endif
1da177e4
LT
145 .mmap = sock_mmap,
146 .open = sock_no_open, /* special open code to disallow open via /proc */
147 .release = sock_close,
148 .fasync = sock_fasync,
5274f052
JA
149 .sendpage = sock_sendpage,
150 .splice_write = generic_splice_sendpage,
9c55e01c 151 .splice_read = sock_splice_read,
1da177e4
LT
152};
153
154/*
155 * The protocol list. Each protocol is registered in here.
156 */
157
1da177e4 158static DEFINE_SPINLOCK(net_family_lock);
f0fd27d4 159static const struct net_proto_family *net_families[NPROTO] __read_mostly;
1da177e4 160
1da177e4
LT
161/*
162 * Statistics counters of the socket lists
163 */
164
c6d409cf 165static DEFINE_PER_CPU(int, sockets_in_use);
1da177e4
LT
166
167/*
89bddce5
SH
168 * Support routines.
169 * Move socket addresses back and forth across the kernel/user
170 * divide and look after the messy bits.
1da177e4
LT
171 */
172
89bddce5 173#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
1da177e4
LT
174 16 for IP, 16 for IPX,
175 24 for IPv6,
89bddce5 176 about 80 for AX.25
1da177e4
LT
177 must be at least one bigger than
178 the AF_UNIX size (see net/unix/af_unix.c
89bddce5 179 :unix_mkname()).
1da177e4 180 */
89bddce5 181
1da177e4
LT
182/**
183 * move_addr_to_kernel - copy a socket address into kernel space
184 * @uaddr: Address in user space
185 * @kaddr: Address in kernel space
186 * @ulen: Length in user space
187 *
188 * The address is copied into kernel space. If the provided address is
189 * too long an error code of -EINVAL is returned. If the copy gives
190 * invalid addresses -EFAULT is returned. On a success 0 is returned.
191 */
192
230b1839 193int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
1da177e4 194{
230b1839 195 if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
1da177e4 196 return -EINVAL;
89bddce5 197 if (ulen == 0)
1da177e4 198 return 0;
89bddce5 199 if (copy_from_user(kaddr, uaddr, ulen))
1da177e4 200 return -EFAULT;
3ec3b2fb 201 return audit_sockaddr(ulen, kaddr);
1da177e4
LT
202}
203
204/**
205 * move_addr_to_user - copy an address to user space
206 * @kaddr: kernel space address
207 * @klen: length of address in kernel
208 * @uaddr: user space address
209 * @ulen: pointer to user length field
210 *
211 * The value pointed to by ulen on entry is the buffer length available.
212 * This is overwritten with the buffer space used. -EINVAL is returned
213 * if an overlong buffer is specified or a negative buffer size. -EFAULT
214 * is returned if either the buffer or the length field are not
215 * accessible.
216 * After copying the data up to the limit the user specifies, the true
217 * length of the data is written over the length limit the user
218 * specified. Zero is returned for a success.
219 */
89bddce5 220
230b1839 221int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
89bddce5 222 int __user *ulen)
1da177e4
LT
223{
224 int err;
225 int len;
226
89bddce5
SH
227 err = get_user(len, ulen);
228 if (err)
1da177e4 229 return err;
89bddce5
SH
230 if (len > klen)
231 len = klen;
230b1839 232 if (len < 0 || len > sizeof(struct sockaddr_storage))
1da177e4 233 return -EINVAL;
89bddce5 234 if (len) {
d6fe3945
SG
235 if (audit_sockaddr(klen, kaddr))
236 return -ENOMEM;
89bddce5 237 if (copy_to_user(uaddr, kaddr, len))
1da177e4
LT
238 return -EFAULT;
239 }
240 /*
89bddce5
SH
241 * "fromlen shall refer to the value before truncation.."
242 * 1003.1g
1da177e4
LT
243 */
244 return __put_user(klen, ulen);
245}
246
e18b890b 247static struct kmem_cache *sock_inode_cachep __read_mostly;
1da177e4
LT
248
249static struct inode *sock_alloc_inode(struct super_block *sb)
250{
251 struct socket_alloc *ei;
89bddce5 252
e94b1766 253 ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
1da177e4
LT
254 if (!ei)
255 return NULL;
43815482
ED
256 ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
257 if (!ei->socket.wq) {
258 kmem_cache_free(sock_inode_cachep, ei);
259 return NULL;
260 }
261 init_waitqueue_head(&ei->socket.wq->wait);
262 ei->socket.wq->fasync_list = NULL;
89bddce5 263
1da177e4
LT
264 ei->socket.state = SS_UNCONNECTED;
265 ei->socket.flags = 0;
266 ei->socket.ops = NULL;
267 ei->socket.sk = NULL;
268 ei->socket.file = NULL;
1da177e4
LT
269
270 return &ei->vfs_inode;
271}
272
43815482
ED
273
274static void wq_free_rcu(struct rcu_head *head)
275{
276 struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
277
278 kfree(wq);
279}
280
1da177e4
LT
281static void sock_destroy_inode(struct inode *inode)
282{
43815482
ED
283 struct socket_alloc *ei;
284
285 ei = container_of(inode, struct socket_alloc, vfs_inode);
286 call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
287 kmem_cache_free(sock_inode_cachep, ei);
1da177e4
LT
288}
289
51cc5068 290static void init_once(void *foo)
1da177e4 291{
89bddce5 292 struct socket_alloc *ei = (struct socket_alloc *)foo;
1da177e4 293
a35afb83 294 inode_init_once(&ei->vfs_inode);
1da177e4 295}
89bddce5 296
1da177e4
LT
297static int init_inodecache(void)
298{
299 sock_inode_cachep = kmem_cache_create("sock_inode_cache",
89bddce5
SH
300 sizeof(struct socket_alloc),
301 0,
302 (SLAB_HWCACHE_ALIGN |
303 SLAB_RECLAIM_ACCOUNT |
304 SLAB_MEM_SPREAD),
20c2df83 305 init_once);
1da177e4
LT
306 if (sock_inode_cachep == NULL)
307 return -ENOMEM;
308 return 0;
309}
310
b87221de 311static const struct super_operations sockfs_ops = {
c6d409cf
ED
312 .alloc_inode = sock_alloc_inode,
313 .destroy_inode = sock_destroy_inode,
314 .statfs = simple_statfs,
1da177e4
LT
315};
316
454e2398 317static int sockfs_get_sb(struct file_system_type *fs_type,
89bddce5
SH
318 int flags, const char *dev_name, void *data,
319 struct vfsmount *mnt)
1da177e4 320{
454e2398
DH
321 return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
322 mnt);
1da177e4
LT
323}
324
ba89966c 325static struct vfsmount *sock_mnt __read_mostly;
1da177e4
LT
326
327static struct file_system_type sock_fs_type = {
328 .name = "sockfs",
329 .get_sb = sockfs_get_sb,
330 .kill_sb = kill_anon_super,
331};
89bddce5 332
c23fbb6b
ED
333/*
334 * sockfs_dname() is called from d_path().
335 */
336static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
337{
338 return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
339 dentry->d_inode->i_ino);
340}
341
3ba13d17 342static const struct dentry_operations sockfs_dentry_operations = {
c23fbb6b 343 .d_dname = sockfs_dname,
1da177e4
LT
344};
345
346/*
347 * Obtains the first available file descriptor and sets it up for use.
348 *
39d8c1b6
DM
349 * These functions create file structures and maps them to fd space
350 * of the current process. On success it returns file descriptor
1da177e4
LT
351 * and file struct implicitly stored in sock->file.
352 * Note that another thread may close file descriptor before we return
353 * from this function. We use the fact that now we do not refer
354 * to socket after mapping. If one day we will need it, this
355 * function will increment ref. count on file by 1.
356 *
357 * In any case returned fd MAY BE not valid!
358 * This race condition is unavoidable
359 * with shared fd spaces, we cannot solve it inside kernel,
360 * but we take care of internal coherence yet.
361 */
362
7cbe66b6 363static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
1da177e4 364{
7cbe66b6 365 struct qstr name = { .name = "" };
2c48b9c4 366 struct path path;
7cbe66b6 367 struct file *file;
1da177e4 368 int fd;
1da177e4 369
a677a039 370 fd = get_unused_fd_flags(flags);
7cbe66b6
AV
371 if (unlikely(fd < 0))
372 return fd;
1da177e4 373
2c48b9c4
AV
374 path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
375 if (unlikely(!path.dentry)) {
7cbe66b6 376 put_unused_fd(fd);
39d8c1b6 377 return -ENOMEM;
7cbe66b6 378 }
2c48b9c4 379 path.mnt = mntget(sock_mnt);
39d8c1b6 380
2c48b9c4 381 path.dentry->d_op = &sockfs_dentry_operations;
2c48b9c4 382 d_instantiate(path.dentry, SOCK_INODE(sock));
cc3808f8 383 SOCK_INODE(sock)->i_fop = &socket_file_ops;
39d8c1b6 384
2c48b9c4 385 file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
ce8d2cdf 386 &socket_file_ops);
cc3808f8
AV
387 if (unlikely(!file)) {
388 /* drop dentry, keep inode */
389 atomic_inc(&path.dentry->d_inode->i_count);
2c48b9c4 390 path_put(&path);
cc3808f8
AV
391 put_unused_fd(fd);
392 return -ENFILE;
393 }
394
395 sock->file = file;
77d27200 396 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
39d8c1b6
DM
397 file->f_pos = 0;
398 file->private_data = sock;
1da177e4 399
7cbe66b6
AV
400 *f = file;
401 return fd;
39d8c1b6
DM
402}
403
a677a039 404int sock_map_fd(struct socket *sock, int flags)
39d8c1b6
DM
405{
406 struct file *newfile;
7cbe66b6 407 int fd = sock_alloc_file(sock, &newfile, flags);
39d8c1b6 408
7cbe66b6 409 if (likely(fd >= 0))
39d8c1b6 410 fd_install(fd, newfile);
7cbe66b6 411
1da177e4
LT
412 return fd;
413}
c6d409cf 414EXPORT_SYMBOL(sock_map_fd);
1da177e4 415
6cb153ca
BL
416static struct socket *sock_from_file(struct file *file, int *err)
417{
6cb153ca
BL
418 if (file->f_op == &socket_file_ops)
419 return file->private_data; /* set in sock_map_fd */
420
23bb80d2
ED
421 *err = -ENOTSOCK;
422 return NULL;
6cb153ca
BL
423}
424
1da177e4 425/**
c6d409cf 426 * sockfd_lookup - Go from a file number to its socket slot
1da177e4
LT
427 * @fd: file handle
428 * @err: pointer to an error code return
429 *
430 * The file handle passed in is locked and the socket it is bound
431 * too is returned. If an error occurs the err pointer is overwritten
432 * with a negative errno code and NULL is returned. The function checks
433 * for both invalid handles and passing a handle which is not a socket.
434 *
435 * On a success the socket object pointer is returned.
436 */
437
438struct socket *sockfd_lookup(int fd, int *err)
439{
440 struct file *file;
1da177e4
LT
441 struct socket *sock;
442
89bddce5
SH
443 file = fget(fd);
444 if (!file) {
1da177e4
LT
445 *err = -EBADF;
446 return NULL;
447 }
89bddce5 448
6cb153ca
BL
449 sock = sock_from_file(file, err);
450 if (!sock)
1da177e4 451 fput(file);
6cb153ca
BL
452 return sock;
453}
c6d409cf 454EXPORT_SYMBOL(sockfd_lookup);
1da177e4 455
6cb153ca
BL
456static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
457{
458 struct file *file;
459 struct socket *sock;
460
3672558c 461 *err = -EBADF;
6cb153ca
BL
462 file = fget_light(fd, fput_needed);
463 if (file) {
464 sock = sock_from_file(file, err);
465 if (sock)
466 return sock;
467 fput_light(file, *fput_needed);
1da177e4 468 }
6cb153ca 469 return NULL;
1da177e4
LT
470}
471
472/**
473 * sock_alloc - allocate a socket
89bddce5 474 *
1da177e4
LT
475 * Allocate a new inode and socket object. The two are bound together
476 * and initialised. The socket is then returned. If we are out of inodes
477 * NULL is returned.
478 */
479
480static struct socket *sock_alloc(void)
481{
89bddce5
SH
482 struct inode *inode;
483 struct socket *sock;
1da177e4
LT
484
485 inode = new_inode(sock_mnt->mnt_sb);
486 if (!inode)
487 return NULL;
488
489 sock = SOCKET_I(inode);
490
29a020d3 491 kmemcheck_annotate_bitfield(sock, type);
89bddce5 492 inode->i_mode = S_IFSOCK | S_IRWXUGO;
8192b0c4
DH
493 inode->i_uid = current_fsuid();
494 inode->i_gid = current_fsgid();
1da177e4 495
4e69489a 496 percpu_add(sockets_in_use, 1);
1da177e4
LT
497 return sock;
498}
499
500/*
501 * In theory you can't get an open on this inode, but /proc provides
502 * a back door. Remember to keep it shut otherwise you'll let the
503 * creepy crawlies in.
504 */
89bddce5 505
1da177e4
LT
506static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
507{
508 return -ENXIO;
509}
510
4b6f5d20 511const struct file_operations bad_sock_fops = {
1da177e4
LT
512 .owner = THIS_MODULE,
513 .open = sock_no_open,
514};
515
516/**
517 * sock_release - close a socket
518 * @sock: socket to close
519 *
520 * The socket is released from the protocol stack if it has a release
521 * callback, and the inode is then released if the socket is bound to
89bddce5 522 * an inode not a file.
1da177e4 523 */
89bddce5 524
1da177e4
LT
525void sock_release(struct socket *sock)
526{
527 if (sock->ops) {
528 struct module *owner = sock->ops->owner;
529
530 sock->ops->release(sock);
531 sock->ops = NULL;
532 module_put(owner);
533 }
534
43815482 535 if (sock->wq->fasync_list)
1da177e4
LT
536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
537
4e69489a 538 percpu_sub(sockets_in_use, 1);
1da177e4
LT
539 if (!sock->file) {
540 iput(SOCK_INODE(sock));
541 return;
542 }
89bddce5 543 sock->file = NULL;
1da177e4 544}
c6d409cf 545EXPORT_SYMBOL(sock_release);
1da177e4 546
20d49473
PO
547int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
548 union skb_shared_tx *shtx)
549{
550 shtx->flags = 0;
551 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
552 shtx->hardware = 1;
553 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
554 shtx->software = 1;
555 return 0;
556}
557EXPORT_SYMBOL(sock_tx_timestamp);
558
89bddce5 559static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
1da177e4
LT
560 struct msghdr *msg, size_t size)
561{
562 struct sock_iocb *si = kiocb_to_siocb(iocb);
563 int err;
564
f8451725
HX
565 sock_update_classid(sock->sk);
566
1da177e4
LT
567 si->sock = sock;
568 si->scm = NULL;
569 si->msg = msg;
570 si->size = size;
571
572 err = security_socket_sendmsg(sock, msg, size);
573 if (err)
574 return err;
575
576 return sock->ops->sendmsg(iocb, sock, msg, size);
577}
578
579int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
580{
581 struct kiocb iocb;
582 struct sock_iocb siocb;
583 int ret;
584
585 init_sync_kiocb(&iocb, NULL);
586 iocb.private = &siocb;
587 ret = __sock_sendmsg(&iocb, sock, msg, size);
588 if (-EIOCBQUEUED == ret)
589 ret = wait_on_sync_kiocb(&iocb);
590 return ret;
591}
c6d409cf 592EXPORT_SYMBOL(sock_sendmsg);
1da177e4
LT
593
594int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
595 struct kvec *vec, size_t num, size_t size)
596{
597 mm_segment_t oldfs = get_fs();
598 int result;
599
600 set_fs(KERNEL_DS);
601 /*
602 * the following is safe, since for compiler definitions of kvec and
603 * iovec are identical, yielding the same in-core layout and alignment
604 */
89bddce5 605 msg->msg_iov = (struct iovec *)vec;
1da177e4
LT
606 msg->msg_iovlen = num;
607 result = sock_sendmsg(sock, msg, size);
608 set_fs(oldfs);
609 return result;
610}
c6d409cf 611EXPORT_SYMBOL(kernel_sendmsg);
1da177e4 612
20d49473
PO
613static int ktime2ts(ktime_t kt, struct timespec *ts)
614{
615 if (kt.tv64) {
616 *ts = ktime_to_timespec(kt);
617 return 1;
618 } else {
619 return 0;
620 }
621}
622
92f37fd2
ED
623/*
624 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
625 */
626void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
627 struct sk_buff *skb)
628{
20d49473
PO
629 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
630 struct timespec ts[3];
631 int empty = 1;
632 struct skb_shared_hwtstamps *shhwtstamps =
633 skb_hwtstamps(skb);
634
635 /* Race occurred between timestamp enabling and packet
636 receiving. Fill in the current time for now. */
637 if (need_software_tstamp && skb->tstamp.tv64 == 0)
638 __net_timestamp(skb);
639
640 if (need_software_tstamp) {
641 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
642 struct timeval tv;
643 skb_get_timestamp(skb, &tv);
644 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
645 sizeof(tv), &tv);
646 } else {
842509b8 647 skb_get_timestampns(skb, &ts[0]);
20d49473 648 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
842509b8 649 sizeof(ts[0]), &ts[0]);
20d49473
PO
650 }
651 }
652
653
654 memset(ts, 0, sizeof(ts));
655 if (skb->tstamp.tv64 &&
656 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
657 skb_get_timestampns(skb, ts + 0);
658 empty = 0;
659 }
660 if (shhwtstamps) {
661 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
662 ktime2ts(shhwtstamps->syststamp, ts + 1))
663 empty = 0;
664 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
665 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
666 empty = 0;
92f37fd2 667 }
20d49473
PO
668 if (!empty)
669 put_cmsg(msg, SOL_SOCKET,
670 SCM_TIMESTAMPING, sizeof(ts), &ts);
92f37fd2 671}
7c81fd8b
ACM
672EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
673
3b885787
NH
674inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
675{
676 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
677 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
678 sizeof(__u32), &skb->dropcount);
679}
680
767dd033 681void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
3b885787
NH
682 struct sk_buff *skb)
683{
684 sock_recv_timestamp(msg, sk, skb);
685 sock_recv_drops(msg, sk, skb);
686}
767dd033 687EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
3b885787 688
a2e27255
ACM
689static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
690 struct msghdr *msg, size_t size, int flags)
1da177e4 691{
1da177e4
LT
692 struct sock_iocb *si = kiocb_to_siocb(iocb);
693
f8451725
HX
694 sock_update_classid(sock->sk);
695
1da177e4
LT
696 si->sock = sock;
697 si->scm = NULL;
698 si->msg = msg;
699 si->size = size;
700 si->flags = flags;
701
1da177e4
LT
702 return sock->ops->recvmsg(iocb, sock, msg, size, flags);
703}
704
a2e27255
ACM
705static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
706 struct msghdr *msg, size_t size, int flags)
707{
708 int err = security_socket_recvmsg(sock, msg, size, flags);
709
710 return err ?: __sock_recvmsg_nosec(iocb, sock, msg, size, flags);
711}
712
89bddce5 713int sock_recvmsg(struct socket *sock, struct msghdr *msg,
1da177e4
LT
714 size_t size, int flags)
715{
716 struct kiocb iocb;
717 struct sock_iocb siocb;
718 int ret;
719
89bddce5 720 init_sync_kiocb(&iocb, NULL);
1da177e4
LT
721 iocb.private = &siocb;
722 ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
723 if (-EIOCBQUEUED == ret)
724 ret = wait_on_sync_kiocb(&iocb);
725 return ret;
726}
c6d409cf 727EXPORT_SYMBOL(sock_recvmsg);
1da177e4 728
a2e27255
ACM
729static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
730 size_t size, int flags)
731{
732 struct kiocb iocb;
733 struct sock_iocb siocb;
734 int ret;
735
736 init_sync_kiocb(&iocb, NULL);
737 iocb.private = &siocb;
738 ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
739 if (-EIOCBQUEUED == ret)
740 ret = wait_on_sync_kiocb(&iocb);
741 return ret;
742}
743
89bddce5
SH
744int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
745 struct kvec *vec, size_t num, size_t size, int flags)
1da177e4
LT
746{
747 mm_segment_t oldfs = get_fs();
748 int result;
749
750 set_fs(KERNEL_DS);
751 /*
752 * the following is safe, since for compiler definitions of kvec and
753 * iovec are identical, yielding the same in-core layout and alignment
754 */
89bddce5 755 msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
1da177e4
LT
756 result = sock_recvmsg(sock, msg, size, flags);
757 set_fs(oldfs);
758 return result;
759}
c6d409cf 760EXPORT_SYMBOL(kernel_recvmsg);
1da177e4
LT
761
762static void sock_aio_dtor(struct kiocb *iocb)
763{
764 kfree(iocb->private);
765}
766
ce1d4d3e
CH
767static ssize_t sock_sendpage(struct file *file, struct page *page,
768 int offset, size_t size, loff_t *ppos, int more)
1da177e4 769{
1da177e4
LT
770 struct socket *sock;
771 int flags;
772
ce1d4d3e
CH
773 sock = file->private_data;
774
775 flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
776 if (more)
777 flags |= MSG_MORE;
778
e6949583 779 return kernel_sendpage(sock, page, offset, size, flags);
ce1d4d3e 780}
1da177e4 781
9c55e01c 782static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
c6d409cf 783 struct pipe_inode_info *pipe, size_t len,
9c55e01c
JA
784 unsigned int flags)
785{
786 struct socket *sock = file->private_data;
787
997b37da
RDC
788 if (unlikely(!sock->ops->splice_read))
789 return -EINVAL;
790
f8451725
HX
791 sock_update_classid(sock->sk);
792
9c55e01c
JA
793 return sock->ops->splice_read(sock, ppos, pipe, len, flags);
794}
795
ce1d4d3e 796static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
89bddce5 797 struct sock_iocb *siocb)
ce1d4d3e
CH
798{
799 if (!is_sync_kiocb(iocb)) {
800 siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
801 if (!siocb)
802 return NULL;
1da177e4
LT
803 iocb->ki_dtor = sock_aio_dtor;
804 }
1da177e4 805
ce1d4d3e 806 siocb->kiocb = iocb;
ce1d4d3e
CH
807 iocb->private = siocb;
808 return siocb;
1da177e4
LT
809}
810
ce1d4d3e 811static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
812 struct file *file, const struct iovec *iov,
813 unsigned long nr_segs)
ce1d4d3e
CH
814{
815 struct socket *sock = file->private_data;
816 size_t size = 0;
817 int i;
1da177e4 818
89bddce5
SH
819 for (i = 0; i < nr_segs; i++)
820 size += iov[i].iov_len;
1da177e4 821
ce1d4d3e
CH
822 msg->msg_name = NULL;
823 msg->msg_namelen = 0;
824 msg->msg_control = NULL;
825 msg->msg_controllen = 0;
89bddce5 826 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
827 msg->msg_iovlen = nr_segs;
828 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
829
830 return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
831}
832
027445c3
BP
833static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
834 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
835{
836 struct sock_iocb siocb, *x;
837
1da177e4
LT
838 if (pos != 0)
839 return -ESPIPE;
027445c3
BP
840
841 if (iocb->ki_left == 0) /* Match SYS5 behaviour */
1da177e4
LT
842 return 0;
843
027445c3
BP
844
845 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
846 if (!x)
847 return -ENOMEM;
027445c3 848 return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
849}
850
ce1d4d3e 851static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
027445c3
BP
852 struct file *file, const struct iovec *iov,
853 unsigned long nr_segs)
1da177e4 854{
ce1d4d3e
CH
855 struct socket *sock = file->private_data;
856 size_t size = 0;
857 int i;
1da177e4 858
89bddce5
SH
859 for (i = 0; i < nr_segs; i++)
860 size += iov[i].iov_len;
1da177e4 861
ce1d4d3e
CH
862 msg->msg_name = NULL;
863 msg->msg_namelen = 0;
864 msg->msg_control = NULL;
865 msg->msg_controllen = 0;
89bddce5 866 msg->msg_iov = (struct iovec *)iov;
ce1d4d3e
CH
867 msg->msg_iovlen = nr_segs;
868 msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
869 if (sock->type == SOCK_SEQPACKET)
870 msg->msg_flags |= MSG_EOR;
1da177e4 871
ce1d4d3e 872 return __sock_sendmsg(iocb, sock, msg, size);
1da177e4
LT
873}
874
027445c3
BP
875static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
876 unsigned long nr_segs, loff_t pos)
ce1d4d3e
CH
877{
878 struct sock_iocb siocb, *x;
1da177e4 879
ce1d4d3e
CH
880 if (pos != 0)
881 return -ESPIPE;
027445c3 882
027445c3 883 x = alloc_sock_iocb(iocb, &siocb);
ce1d4d3e
CH
884 if (!x)
885 return -ENOMEM;
1da177e4 886
027445c3 887 return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
1da177e4
LT
888}
889
1da177e4
LT
890/*
891 * Atomic setting of ioctl hooks to avoid race
892 * with module unload.
893 */
894
4a3e2f71 895static DEFINE_MUTEX(br_ioctl_mutex);
c6d409cf 896static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg);
1da177e4 897
881d966b 898void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
1da177e4 899{
4a3e2f71 900 mutex_lock(&br_ioctl_mutex);
1da177e4 901 br_ioctl_hook = hook;
4a3e2f71 902 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
903}
904EXPORT_SYMBOL(brioctl_set);
905
4a3e2f71 906static DEFINE_MUTEX(vlan_ioctl_mutex);
881d966b 907static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
1da177e4 908
881d966b 909void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
1da177e4 910{
4a3e2f71 911 mutex_lock(&vlan_ioctl_mutex);
1da177e4 912 vlan_ioctl_hook = hook;
4a3e2f71 913 mutex_unlock(&vlan_ioctl_mutex);
1da177e4
LT
914}
915EXPORT_SYMBOL(vlan_ioctl_set);
916
4a3e2f71 917static DEFINE_MUTEX(dlci_ioctl_mutex);
89bddce5 918static int (*dlci_ioctl_hook) (unsigned int, void __user *);
1da177e4 919
89bddce5 920void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
1da177e4 921{
4a3e2f71 922 mutex_lock(&dlci_ioctl_mutex);
1da177e4 923 dlci_ioctl_hook = hook;
4a3e2f71 924 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
925}
926EXPORT_SYMBOL(dlci_ioctl_set);
927
6b96018b
AB
928static long sock_do_ioctl(struct net *net, struct socket *sock,
929 unsigned int cmd, unsigned long arg)
930{
931 int err;
932 void __user *argp = (void __user *)arg;
933
934 err = sock->ops->ioctl(sock, cmd, arg);
935
936 /*
937 * If this ioctl is unknown try to hand it down
938 * to the NIC driver.
939 */
940 if (err == -ENOIOCTLCMD)
941 err = dev_ioctl(net, cmd, argp);
942
943 return err;
944}
945
1da177e4
LT
946/*
947 * With an ioctl, arg may well be a user mode pointer, but we don't know
948 * what to do with it - that's up to the protocol still.
949 */
950
951static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
952{
953 struct socket *sock;
881d966b 954 struct sock *sk;
1da177e4
LT
955 void __user *argp = (void __user *)arg;
956 int pid, err;
881d966b 957 struct net *net;
1da177e4 958
b69aee04 959 sock = file->private_data;
881d966b 960 sk = sock->sk;
3b1e0a65 961 net = sock_net(sk);
1da177e4 962 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
881d966b 963 err = dev_ioctl(net, cmd, argp);
1da177e4 964 } else
3d23e349 965#ifdef CONFIG_WEXT_CORE
1da177e4 966 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
881d966b 967 err = dev_ioctl(net, cmd, argp);
1da177e4 968 } else
3d23e349 969#endif
89bddce5 970 switch (cmd) {
1da177e4
LT
971 case FIOSETOWN:
972 case SIOCSPGRP:
973 err = -EFAULT;
974 if (get_user(pid, (int __user *)argp))
975 break;
976 err = f_setown(sock->file, pid, 1);
977 break;
978 case FIOGETOWN:
979 case SIOCGPGRP:
609d7fa9 980 err = put_user(f_getown(sock->file),
89bddce5 981 (int __user *)argp);
1da177e4
LT
982 break;
983 case SIOCGIFBR:
984 case SIOCSIFBR:
985 case SIOCBRADDBR:
986 case SIOCBRDELBR:
987 err = -ENOPKG;
988 if (!br_ioctl_hook)
989 request_module("bridge");
990
4a3e2f71 991 mutex_lock(&br_ioctl_mutex);
89bddce5 992 if (br_ioctl_hook)
881d966b 993 err = br_ioctl_hook(net, cmd, argp);
4a3e2f71 994 mutex_unlock(&br_ioctl_mutex);
1da177e4
LT
995 break;
996 case SIOCGIFVLAN:
997 case SIOCSIFVLAN:
998 err = -ENOPKG;
999 if (!vlan_ioctl_hook)
1000 request_module("8021q");
1001
4a3e2f71 1002 mutex_lock(&vlan_ioctl_mutex);
1da177e4 1003 if (vlan_ioctl_hook)
881d966b 1004 err = vlan_ioctl_hook(net, argp);
4a3e2f71 1005 mutex_unlock(&vlan_ioctl_mutex);
1da177e4 1006 break;
1da177e4
LT
1007 case SIOCADDDLCI:
1008 case SIOCDELDLCI:
1009 err = -ENOPKG;
1010 if (!dlci_ioctl_hook)
1011 request_module("dlci");
1012
7512cbf6
PE
1013 mutex_lock(&dlci_ioctl_mutex);
1014 if (dlci_ioctl_hook)
1da177e4 1015 err = dlci_ioctl_hook(cmd, argp);
7512cbf6 1016 mutex_unlock(&dlci_ioctl_mutex);
1da177e4
LT
1017 break;
1018 default:
6b96018b 1019 err = sock_do_ioctl(net, sock, cmd, arg);
1da177e4 1020 break;
89bddce5 1021 }
1da177e4
LT
1022 return err;
1023}
1024
1025int sock_create_lite(int family, int type, int protocol, struct socket **res)
1026{
1027 int err;
1028 struct socket *sock = NULL;
89bddce5 1029
1da177e4
LT
1030 err = security_socket_create(family, type, protocol, 1);
1031 if (err)
1032 goto out;
1033
1034 sock = sock_alloc();
1035 if (!sock) {
1036 err = -ENOMEM;
1037 goto out;
1038 }
1039
1da177e4 1040 sock->type = type;
7420ed23
VY
1041 err = security_socket_post_create(sock, family, type, protocol, 1);
1042 if (err)
1043 goto out_release;
1044
1da177e4
LT
1045out:
1046 *res = sock;
1047 return err;
7420ed23
VY
1048out_release:
1049 sock_release(sock);
1050 sock = NULL;
1051 goto out;
1da177e4 1052}
c6d409cf 1053EXPORT_SYMBOL(sock_create_lite);
1da177e4
LT
1054
1055/* No kernel lock held - perfect */
89bddce5 1056static unsigned int sock_poll(struct file *file, poll_table *wait)
1da177e4
LT
1057{
1058 struct socket *sock;
1059
1060 /*
89bddce5 1061 * We can't return errors to poll, so it's either yes or no.
1da177e4 1062 */
b69aee04 1063 sock = file->private_data;
1da177e4
LT
1064 return sock->ops->poll(file, sock, wait);
1065}
1066
89bddce5 1067static int sock_mmap(struct file *file, struct vm_area_struct *vma)
1da177e4 1068{
b69aee04 1069 struct socket *sock = file->private_data;
1da177e4
LT
1070
1071 return sock->ops->mmap(file, sock, vma);
1072}
1073
20380731 1074static int sock_close(struct inode *inode, struct file *filp)
1da177e4
LT
1075{
1076 /*
89bddce5
SH
1077 * It was possible the inode is NULL we were
1078 * closing an unfinished socket.
1da177e4
LT
1079 */
1080
89bddce5 1081 if (!inode) {
1da177e4
LT
1082 printk(KERN_DEBUG "sock_close: NULL inode\n");
1083 return 0;
1084 }
1da177e4
LT
1085 sock_release(SOCKET_I(inode));
1086 return 0;
1087}
1088
1089/*
1090 * Update the socket async list
1091 *
1092 * Fasync_list locking strategy.
1093 *
1094 * 1. fasync_list is modified only under process context socket lock
1095 * i.e. under semaphore.
1096 * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
989a2979 1097 * or under socket lock
1da177e4
LT
1098 */
1099
1100static int sock_fasync(int fd, struct file *filp, int on)
1101{
989a2979
ED
1102 struct socket *sock = filp->private_data;
1103 struct sock *sk = sock->sk;
1da177e4 1104
989a2979 1105 if (sk == NULL)
1da177e4 1106 return -EINVAL;
1da177e4
LT
1107
1108 lock_sock(sk);
1109
43815482 1110 fasync_helper(fd, filp, on, &sock->wq->fasync_list);
1da177e4 1111
43815482 1112 if (!sock->wq->fasync_list)
989a2979
ED
1113 sock_reset_flag(sk, SOCK_FASYNC);
1114 else
bcdce719 1115 sock_set_flag(sk, SOCK_FASYNC);
1da177e4 1116
989a2979 1117 release_sock(sk);
1da177e4
LT
1118 return 0;
1119}
1120
43815482 1121/* This function may be called only under socket lock or callback_lock or rcu_lock */
1da177e4
LT
1122
1123int sock_wake_async(struct socket *sock, int how, int band)
1124{
43815482
ED
1125 struct socket_wq *wq;
1126
1127 if (!sock)
1128 return -1;
1129 rcu_read_lock();
1130 wq = rcu_dereference(sock->wq);
1131 if (!wq || !wq->fasync_list) {
1132 rcu_read_unlock();
1da177e4 1133 return -1;
43815482 1134 }
89bddce5 1135 switch (how) {
8d8ad9d7 1136 case SOCK_WAKE_WAITD:
1da177e4
LT
1137 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1138 break;
1139 goto call_kill;
8d8ad9d7 1140 case SOCK_WAKE_SPACE:
1da177e4
LT
1141 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1142 break;
1143 /* fall through */
8d8ad9d7 1144 case SOCK_WAKE_IO:
89bddce5 1145call_kill:
43815482 1146 kill_fasync(&wq->fasync_list, SIGIO, band);
1da177e4 1147 break;
8d8ad9d7 1148 case SOCK_WAKE_URG:
43815482 1149 kill_fasync(&wq->fasync_list, SIGURG, band);
1da177e4 1150 }
43815482 1151 rcu_read_unlock();
1da177e4
LT
1152 return 0;
1153}
c6d409cf 1154EXPORT_SYMBOL(sock_wake_async);
1da177e4 1155
1b8d7ae4 1156static int __sock_create(struct net *net, int family, int type, int protocol,
89bddce5 1157 struct socket **res, int kern)
1da177e4
LT
1158{
1159 int err;
1160 struct socket *sock;
55737fda 1161 const struct net_proto_family *pf;
1da177e4
LT
1162
1163 /*
89bddce5 1164 * Check protocol is in range
1da177e4
LT
1165 */
1166 if (family < 0 || family >= NPROTO)
1167 return -EAFNOSUPPORT;
1168 if (type < 0 || type >= SOCK_MAX)
1169 return -EINVAL;
1170
1171 /* Compatibility.
1172
1173 This uglymoron is moved from INET layer to here to avoid
1174 deadlock in module load.
1175 */
1176 if (family == PF_INET && type == SOCK_PACKET) {
89bddce5 1177 static int warned;
1da177e4
LT
1178 if (!warned) {
1179 warned = 1;
89bddce5
SH
1180 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1181 current->comm);
1da177e4
LT
1182 }
1183 family = PF_PACKET;
1184 }
1185
1186 err = security_socket_create(family, type, protocol, kern);
1187 if (err)
1188 return err;
89bddce5 1189
55737fda
SH
1190 /*
1191 * Allocate the socket and allow the family to set things up. if
1192 * the protocol is 0, the family is instructed to select an appropriate
1193 * default.
1194 */
1195 sock = sock_alloc();
1196 if (!sock) {
1197 if (net_ratelimit())
1198 printk(KERN_WARNING "socket: no more sockets\n");
1199 return -ENFILE; /* Not exactly a match, but its the
1200 closest posix thing */
1201 }
1202
1203 sock->type = type;
1204
95a5afca 1205#ifdef CONFIG_MODULES
89bddce5
SH
1206 /* Attempt to load a protocol module if the find failed.
1207 *
1208 * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1da177e4
LT
1209 * requested real, full-featured networking support upon configuration.
1210 * Otherwise module support will break!
1211 */
55737fda 1212 if (net_families[family] == NULL)
89bddce5 1213 request_module("net-pf-%d", family);
1da177e4
LT
1214#endif
1215
55737fda
SH
1216 rcu_read_lock();
1217 pf = rcu_dereference(net_families[family]);
1218 err = -EAFNOSUPPORT;
1219 if (!pf)
1220 goto out_release;
1da177e4
LT
1221
1222 /*
1223 * We will call the ->create function, that possibly is in a loadable
1224 * module, so we have to bump that loadable module refcnt first.
1225 */
55737fda 1226 if (!try_module_get(pf->owner))
1da177e4
LT
1227 goto out_release;
1228
55737fda
SH
1229 /* Now protected by module ref count */
1230 rcu_read_unlock();
1231
3f378b68 1232 err = pf->create(net, sock, protocol, kern);
55737fda 1233 if (err < 0)
1da177e4 1234 goto out_module_put;
a79af59e 1235
1da177e4
LT
1236 /*
1237 * Now to bump the refcnt of the [loadable] module that owns this
1238 * socket at sock_release time we decrement its refcnt.
1239 */
55737fda
SH
1240 if (!try_module_get(sock->ops->owner))
1241 goto out_module_busy;
1242
1da177e4
LT
1243 /*
1244 * Now that we're done with the ->create function, the [loadable]
1245 * module can have its refcnt decremented
1246 */
55737fda 1247 module_put(pf->owner);
7420ed23
VY
1248 err = security_socket_post_create(sock, family, type, protocol, kern);
1249 if (err)
3b185525 1250 goto out_sock_release;
55737fda 1251 *res = sock;
1da177e4 1252
55737fda
SH
1253 return 0;
1254
1255out_module_busy:
1256 err = -EAFNOSUPPORT;
1da177e4 1257out_module_put:
55737fda
SH
1258 sock->ops = NULL;
1259 module_put(pf->owner);
1260out_sock_release:
1da177e4 1261 sock_release(sock);
55737fda
SH
1262 return err;
1263
1264out_release:
1265 rcu_read_unlock();
1266 goto out_sock_release;
1da177e4
LT
1267}
1268
1269int sock_create(int family, int type, int protocol, struct socket **res)
1270{
1b8d7ae4 1271 return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
1da177e4 1272}
c6d409cf 1273EXPORT_SYMBOL(sock_create);
1da177e4
LT
1274
1275int sock_create_kern(int family, int type, int protocol, struct socket **res)
1276{
1b8d7ae4 1277 return __sock_create(&init_net, family, type, protocol, res, 1);
1da177e4 1278}
c6d409cf 1279EXPORT_SYMBOL(sock_create_kern);
1da177e4 1280
3e0fa65f 1281SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
1da177e4
LT
1282{
1283 int retval;
1284 struct socket *sock;
a677a039
UD
1285 int flags;
1286
e38b36f3
UD
1287 /* Check the SOCK_* constants for consistency. */
1288 BUILD_BUG_ON(SOCK_CLOEXEC != O_CLOEXEC);
1289 BUILD_BUG_ON((SOCK_MAX | SOCK_TYPE_MASK) != SOCK_TYPE_MASK);
1290 BUILD_BUG_ON(SOCK_CLOEXEC & SOCK_TYPE_MASK);
1291 BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
1292
a677a039 1293 flags = type & ~SOCK_TYPE_MASK;
77d27200 1294 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1295 return -EINVAL;
1296 type &= SOCK_TYPE_MASK;
1da177e4 1297
aaca0bdc
UD
1298 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1299 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1300
1da177e4
LT
1301 retval = sock_create(family, type, protocol, &sock);
1302 if (retval < 0)
1303 goto out;
1304
77d27200 1305 retval = sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK));
1da177e4
LT
1306 if (retval < 0)
1307 goto out_release;
1308
1309out:
1310 /* It may be already another descriptor 8) Not kernel problem. */
1311 return retval;
1312
1313out_release:
1314 sock_release(sock);
1315 return retval;
1316}
1317
1318/*
1319 * Create a pair of connected sockets.
1320 */
1321
3e0fa65f
HC
1322SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
1323 int __user *, usockvec)
1da177e4
LT
1324{
1325 struct socket *sock1, *sock2;
1326 int fd1, fd2, err;
db349509 1327 struct file *newfile1, *newfile2;
a677a039
UD
1328 int flags;
1329
1330 flags = type & ~SOCK_TYPE_MASK;
77d27200 1331 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
a677a039
UD
1332 return -EINVAL;
1333 type &= SOCK_TYPE_MASK;
1da177e4 1334
aaca0bdc
UD
1335 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1336 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1337
1da177e4
LT
1338 /*
1339 * Obtain the first socket and check if the underlying protocol
1340 * supports the socketpair call.
1341 */
1342
1343 err = sock_create(family, type, protocol, &sock1);
1344 if (err < 0)
1345 goto out;
1346
1347 err = sock_create(family, type, protocol, &sock2);
1348 if (err < 0)
1349 goto out_release_1;
1350
1351 err = sock1->ops->socketpair(sock1, sock2);
89bddce5 1352 if (err < 0)
1da177e4
LT
1353 goto out_release_both;
1354
7cbe66b6 1355 fd1 = sock_alloc_file(sock1, &newfile1, flags);
bf3c23d1
DM
1356 if (unlikely(fd1 < 0)) {
1357 err = fd1;
db349509 1358 goto out_release_both;
bf3c23d1 1359 }
1da177e4 1360
7cbe66b6 1361 fd2 = sock_alloc_file(sock2, &newfile2, flags);
198de4d7
AV
1362 if (unlikely(fd2 < 0)) {
1363 err = fd2;
1364 fput(newfile1);
1365 put_unused_fd(fd1);
1366 sock_release(sock2);
1367 goto out;
db349509
AV
1368 }
1369
157cf649 1370 audit_fd_pair(fd1, fd2);
db349509
AV
1371 fd_install(fd1, newfile1);
1372 fd_install(fd2, newfile2);
1da177e4
LT
1373 /* fd1 and fd2 may be already another descriptors.
1374 * Not kernel problem.
1375 */
1376
89bddce5 1377 err = put_user(fd1, &usockvec[0]);
1da177e4
LT
1378 if (!err)
1379 err = put_user(fd2, &usockvec[1]);
1380 if (!err)
1381 return 0;
1382
1383 sys_close(fd2);
1384 sys_close(fd1);
1385 return err;
1386
1da177e4 1387out_release_both:
89bddce5 1388 sock_release(sock2);
1da177e4 1389out_release_1:
89bddce5 1390 sock_release(sock1);
1da177e4
LT
1391out:
1392 return err;
1393}
1394
1da177e4
LT
1395/*
1396 * Bind a name to a socket. Nothing much to do here since it's
1397 * the protocol's responsibility to handle the local address.
1398 *
1399 * We move the socket address to kernel space before we call
1400 * the protocol layer (having also checked the address is ok).
1401 */
1402
20f37034 1403SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
1da177e4
LT
1404{
1405 struct socket *sock;
230b1839 1406 struct sockaddr_storage address;
6cb153ca 1407 int err, fput_needed;
1da177e4 1408
89bddce5 1409 sock = sockfd_lookup_light(fd, &err, &fput_needed);
e71a4783 1410 if (sock) {
230b1839 1411 err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address);
89bddce5
SH
1412 if (err >= 0) {
1413 err = security_socket_bind(sock,
230b1839 1414 (struct sockaddr *)&address,
89bddce5 1415 addrlen);
6cb153ca
BL
1416 if (!err)
1417 err = sock->ops->bind(sock,
89bddce5 1418 (struct sockaddr *)
230b1839 1419 &address, addrlen);
1da177e4 1420 }
6cb153ca 1421 fput_light(sock->file, fput_needed);
89bddce5 1422 }
1da177e4
LT
1423 return err;
1424}
1425
1da177e4
LT
1426/*
1427 * Perform a listen. Basically, we allow the protocol to do anything
1428 * necessary for a listen, and if that works, we mark the socket as
1429 * ready for listening.
1430 */
1431
3e0fa65f 1432SYSCALL_DEFINE2(listen, int, fd, int, backlog)
1da177e4
LT
1433{
1434 struct socket *sock;
6cb153ca 1435 int err, fput_needed;
b8e1f9b5 1436 int somaxconn;
89bddce5
SH
1437
1438 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1439 if (sock) {
8efa6e93 1440 somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
b8e1f9b5
PE
1441 if ((unsigned)backlog > somaxconn)
1442 backlog = somaxconn;
1da177e4
LT
1443
1444 err = security_socket_listen(sock, backlog);
6cb153ca
BL
1445 if (!err)
1446 err = sock->ops->listen(sock, backlog);
1da177e4 1447
6cb153ca 1448 fput_light(sock->file, fput_needed);
1da177e4
LT
1449 }
1450 return err;
1451}
1452
1da177e4
LT
1453/*
1454 * For accept, we attempt to create a new socket, set up the link
1455 * with the client, wake up the client, then return the new
1456 * connected fd. We collect the address of the connector in kernel
1457 * space and move it to user at the very end. This is unclean because
1458 * we open the socket then return an error.
1459 *
1460 * 1003.1g adds the ability to recvmsg() to query connection pending
1461 * status to recvmsg. We need to add that support in a way thats
1462 * clean when we restucture accept also.
1463 */
1464
20f37034
HC
1465SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1466 int __user *, upeer_addrlen, int, flags)
1da177e4
LT
1467{
1468 struct socket *sock, *newsock;
39d8c1b6 1469 struct file *newfile;
6cb153ca 1470 int err, len, newfd, fput_needed;
230b1839 1471 struct sockaddr_storage address;
1da177e4 1472
77d27200 1473 if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
aaca0bdc
UD
1474 return -EINVAL;
1475
1476 if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
1477 flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
1478
6cb153ca 1479 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1480 if (!sock)
1481 goto out;
1482
1483 err = -ENFILE;
c6d409cf
ED
1484 newsock = sock_alloc();
1485 if (!newsock)
1da177e4
LT
1486 goto out_put;
1487
1488 newsock->type = sock->type;
1489 newsock->ops = sock->ops;
1490
1da177e4
LT
1491 /*
1492 * We don't need try_module_get here, as the listening socket (sock)
1493 * has the protocol module (sock->ops->owner) held.
1494 */
1495 __module_get(newsock->ops->owner);
1496
7cbe66b6 1497 newfd = sock_alloc_file(newsock, &newfile, flags);
39d8c1b6
DM
1498 if (unlikely(newfd < 0)) {
1499 err = newfd;
9a1875e6
DM
1500 sock_release(newsock);
1501 goto out_put;
39d8c1b6
DM
1502 }
1503
a79af59e
FF
1504 err = security_socket_accept(sock, newsock);
1505 if (err)
39d8c1b6 1506 goto out_fd;
a79af59e 1507
1da177e4
LT
1508 err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1509 if (err < 0)
39d8c1b6 1510 goto out_fd;
1da177e4
LT
1511
1512 if (upeer_sockaddr) {
230b1839 1513 if (newsock->ops->getname(newsock, (struct sockaddr *)&address,
89bddce5 1514 &len, 2) < 0) {
1da177e4 1515 err = -ECONNABORTED;
39d8c1b6 1516 goto out_fd;
1da177e4 1517 }
230b1839
YH
1518 err = move_addr_to_user((struct sockaddr *)&address,
1519 len, upeer_sockaddr, upeer_addrlen);
1da177e4 1520 if (err < 0)
39d8c1b6 1521 goto out_fd;
1da177e4
LT
1522 }
1523
1524 /* File flags are not inherited via accept() unlike another OSes. */
1525
39d8c1b6
DM
1526 fd_install(newfd, newfile);
1527 err = newfd;
1da177e4 1528
1da177e4 1529out_put:
6cb153ca 1530 fput_light(sock->file, fput_needed);
1da177e4
LT
1531out:
1532 return err;
39d8c1b6 1533out_fd:
9606a216 1534 fput(newfile);
39d8c1b6 1535 put_unused_fd(newfd);
1da177e4
LT
1536 goto out_put;
1537}
1538
20f37034
HC
1539SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr,
1540 int __user *, upeer_addrlen)
aaca0bdc 1541{
de11defe 1542 return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0);
aaca0bdc
UD
1543}
1544
1da177e4
LT
1545/*
1546 * Attempt to connect to a socket with the server address. The address
1547 * is in user space so we verify it is OK and move it to kernel space.
1548 *
1549 * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1550 * break bindings
1551 *
1552 * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1553 * other SEQPACKET protocols that take time to connect() as it doesn't
1554 * include the -EINPROGRESS status for such sockets.
1555 */
1556
20f37034
HC
1557SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
1558 int, addrlen)
1da177e4
LT
1559{
1560 struct socket *sock;
230b1839 1561 struct sockaddr_storage address;
6cb153ca 1562 int err, fput_needed;
1da177e4 1563
6cb153ca 1564 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1565 if (!sock)
1566 goto out;
230b1839 1567 err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address);
1da177e4
LT
1568 if (err < 0)
1569 goto out_put;
1570
89bddce5 1571 err =
230b1839 1572 security_socket_connect(sock, (struct sockaddr *)&address, addrlen);
1da177e4
LT
1573 if (err)
1574 goto out_put;
1575
230b1839 1576 err = sock->ops->connect(sock, (struct sockaddr *)&address, addrlen,
1da177e4
LT
1577 sock->file->f_flags);
1578out_put:
6cb153ca 1579 fput_light(sock->file, fput_needed);
1da177e4
LT
1580out:
1581 return err;
1582}
1583
1584/*
1585 * Get the local address ('name') of a socket object. Move the obtained
1586 * name to user space.
1587 */
1588
20f37034
HC
1589SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
1590 int __user *, usockaddr_len)
1da177e4
LT
1591{
1592 struct socket *sock;
230b1839 1593 struct sockaddr_storage address;
6cb153ca 1594 int len, err, fput_needed;
89bddce5 1595
6cb153ca 1596 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4
LT
1597 if (!sock)
1598 goto out;
1599
1600 err = security_socket_getsockname(sock);
1601 if (err)
1602 goto out_put;
1603
230b1839 1604 err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0);
1da177e4
LT
1605 if (err)
1606 goto out_put;
230b1839 1607 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len);
1da177e4
LT
1608
1609out_put:
6cb153ca 1610 fput_light(sock->file, fput_needed);
1da177e4
LT
1611out:
1612 return err;
1613}
1614
1615/*
1616 * Get the remote address ('name') of a socket object. Move the obtained
1617 * name to user space.
1618 */
1619
20f37034
HC
1620SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
1621 int __user *, usockaddr_len)
1da177e4
LT
1622{
1623 struct socket *sock;
230b1839 1624 struct sockaddr_storage address;
6cb153ca 1625 int len, err, fput_needed;
1da177e4 1626
89bddce5
SH
1627 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1628 if (sock != NULL) {
1da177e4
LT
1629 err = security_socket_getpeername(sock);
1630 if (err) {
6cb153ca 1631 fput_light(sock->file, fput_needed);
1da177e4
LT
1632 return err;
1633 }
1634
89bddce5 1635 err =
230b1839 1636 sock->ops->getname(sock, (struct sockaddr *)&address, &len,
89bddce5 1637 1);
1da177e4 1638 if (!err)
230b1839 1639 err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr,
89bddce5 1640 usockaddr_len);
6cb153ca 1641 fput_light(sock->file, fput_needed);
1da177e4
LT
1642 }
1643 return err;
1644}
1645
1646/*
1647 * Send a datagram to a given address. We move the address into kernel
1648 * space and check the user space data area is readable before invoking
1649 * the protocol.
1650 */
1651
3e0fa65f
HC
1652SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
1653 unsigned, flags, struct sockaddr __user *, addr,
1654 int, addr_len)
1da177e4
LT
1655{
1656 struct socket *sock;
230b1839 1657 struct sockaddr_storage address;
1da177e4
LT
1658 int err;
1659 struct msghdr msg;
1660 struct iovec iov;
6cb153ca 1661 int fput_needed;
6cb153ca 1662
de0fa95c
PE
1663 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1664 if (!sock)
4387ff75 1665 goto out;
6cb153ca 1666
89bddce5
SH
1667 iov.iov_base = buff;
1668 iov.iov_len = len;
1669 msg.msg_name = NULL;
1670 msg.msg_iov = &iov;
1671 msg.msg_iovlen = 1;
1672 msg.msg_control = NULL;
1673 msg.msg_controllen = 0;
1674 msg.msg_namelen = 0;
6cb153ca 1675 if (addr) {
230b1839 1676 err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address);
1da177e4
LT
1677 if (err < 0)
1678 goto out_put;
230b1839 1679 msg.msg_name = (struct sockaddr *)&address;
89bddce5 1680 msg.msg_namelen = addr_len;
1da177e4
LT
1681 }
1682 if (sock->file->f_flags & O_NONBLOCK)
1683 flags |= MSG_DONTWAIT;
1684 msg.msg_flags = flags;
1685 err = sock_sendmsg(sock, &msg, len);
1686
89bddce5 1687out_put:
de0fa95c 1688 fput_light(sock->file, fput_needed);
4387ff75 1689out:
1da177e4
LT
1690 return err;
1691}
1692
1693/*
89bddce5 1694 * Send a datagram down a socket.
1da177e4
LT
1695 */
1696
3e0fa65f
HC
1697SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len,
1698 unsigned, flags)
1da177e4
LT
1699{
1700 return sys_sendto(fd, buff, len, flags, NULL, 0);
1701}
1702
1703/*
89bddce5 1704 * Receive a frame from the socket and optionally record the address of the
1da177e4
LT
1705 * sender. We verify the buffers are writable and if needed move the
1706 * sender address from kernel to user space.
1707 */
1708
3e0fa65f
HC
1709SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
1710 unsigned, flags, struct sockaddr __user *, addr,
1711 int __user *, addr_len)
1da177e4
LT
1712{
1713 struct socket *sock;
1714 struct iovec iov;
1715 struct msghdr msg;
230b1839 1716 struct sockaddr_storage address;
89bddce5 1717 int err, err2;
6cb153ca
BL
1718 int fput_needed;
1719
de0fa95c 1720 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1da177e4 1721 if (!sock)
de0fa95c 1722 goto out;
1da177e4 1723
89bddce5
SH
1724 msg.msg_control = NULL;
1725 msg.msg_controllen = 0;
1726 msg.msg_iovlen = 1;
1727 msg.msg_iov = &iov;
1728 iov.iov_len = size;
1729 iov.iov_base = ubuf;
230b1839
YH
1730 msg.msg_name = (struct sockaddr *)&address;
1731 msg.msg_namelen = sizeof(address);
1da177e4
LT
1732 if (sock->file->f_flags & O_NONBLOCK)
1733 flags |= MSG_DONTWAIT;
89bddce5 1734 err = sock_recvmsg(sock, &msg, size, flags);
1da177e4 1735
89bddce5 1736 if (err >= 0 && addr != NULL) {
230b1839
YH
1737 err2 = move_addr_to_user((struct sockaddr *)&address,
1738 msg.msg_namelen, addr, addr_len);
89bddce5
SH
1739 if (err2 < 0)
1740 err = err2;
1da177e4 1741 }
de0fa95c
PE
1742
1743 fput_light(sock->file, fput_needed);
4387ff75 1744out:
1da177e4
LT
1745 return err;
1746}
1747
1748/*
89bddce5 1749 * Receive a datagram from a socket.
1da177e4
LT
1750 */
1751
89bddce5
SH
1752asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size,
1753 unsigned flags)
1da177e4
LT
1754{
1755 return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1756}
1757
1758/*
1759 * Set a socket option. Because we don't know the option lengths we have
1760 * to pass the user mode parameter for the protocols to sort out.
1761 */
1762
20f37034
HC
1763SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
1764 char __user *, optval, int, optlen)
1da177e4 1765{
6cb153ca 1766 int err, fput_needed;
1da177e4
LT
1767 struct socket *sock;
1768
1769 if (optlen < 0)
1770 return -EINVAL;
89bddce5
SH
1771
1772 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1773 if (sock != NULL) {
1774 err = security_socket_setsockopt(sock, level, optname);
6cb153ca
BL
1775 if (err)
1776 goto out_put;
1da177e4
LT
1777
1778 if (level == SOL_SOCKET)
89bddce5
SH
1779 err =
1780 sock_setsockopt(sock, level, optname, optval,
1781 optlen);
1da177e4 1782 else
89bddce5
SH
1783 err =
1784 sock->ops->setsockopt(sock, level, optname, optval,
1785 optlen);
6cb153ca
BL
1786out_put:
1787 fput_light(sock->file, fput_needed);
1da177e4
LT
1788 }
1789 return err;
1790}
1791
1792/*
1793 * Get a socket option. Because we don't know the option lengths we have
1794 * to pass a user mode parameter for the protocols to sort out.
1795 */
1796
20f37034
HC
1797SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
1798 char __user *, optval, int __user *, optlen)
1da177e4 1799{
6cb153ca 1800 int err, fput_needed;
1da177e4
LT
1801 struct socket *sock;
1802
89bddce5
SH
1803 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1804 if (sock != NULL) {
6cb153ca
BL
1805 err = security_socket_getsockopt(sock, level, optname);
1806 if (err)
1807 goto out_put;
1da177e4
LT
1808
1809 if (level == SOL_SOCKET)
89bddce5
SH
1810 err =
1811 sock_getsockopt(sock, level, optname, optval,
1812 optlen);
1da177e4 1813 else
89bddce5
SH
1814 err =
1815 sock->ops->getsockopt(sock, level, optname, optval,
1816 optlen);
6cb153ca
BL
1817out_put:
1818 fput_light(sock->file, fput_needed);
1da177e4
LT
1819 }
1820 return err;
1821}
1822
1da177e4
LT
1823/*
1824 * Shutdown a socket.
1825 */
1826
754fe8d2 1827SYSCALL_DEFINE2(shutdown, int, fd, int, how)
1da177e4 1828{
6cb153ca 1829 int err, fput_needed;
1da177e4
LT
1830 struct socket *sock;
1831
89bddce5
SH
1832 sock = sockfd_lookup_light(fd, &err, &fput_needed);
1833 if (sock != NULL) {
1da177e4 1834 err = security_socket_shutdown(sock, how);
6cb153ca
BL
1835 if (!err)
1836 err = sock->ops->shutdown(sock, how);
1837 fput_light(sock->file, fput_needed);
1da177e4
LT
1838 }
1839 return err;
1840}
1841
89bddce5 1842/* A couple of helpful macros for getting the address of the 32/64 bit
1da177e4
LT
1843 * fields which are the same type (int / unsigned) on our platforms.
1844 */
1845#define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1846#define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1847#define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1848
1da177e4
LT
1849/*
1850 * BSD sendmsg interface
1851 */
1852
3e0fa65f 1853SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1da177e4 1854{
89bddce5
SH
1855 struct compat_msghdr __user *msg_compat =
1856 (struct compat_msghdr __user *)msg;
1da177e4 1857 struct socket *sock;
230b1839 1858 struct sockaddr_storage address;
1da177e4 1859 struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
b9d717a7 1860 unsigned char ctl[sizeof(struct cmsghdr) + 20]
89bddce5
SH
1861 __attribute__ ((aligned(sizeof(__kernel_size_t))));
1862 /* 20 is size of ipv6_pktinfo */
1da177e4
LT
1863 unsigned char *ctl_buf = ctl;
1864 struct msghdr msg_sys;
1865 int err, ctl_len, iov_size, total_len;
6cb153ca 1866 int fput_needed;
89bddce5 1867
1da177e4
LT
1868 err = -EFAULT;
1869 if (MSG_CMSG_COMPAT & flags) {
1870 if (get_compat_msghdr(&msg_sys, msg_compat))
1871 return -EFAULT;
c6d409cf 1872 } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1da177e4
LT
1873 return -EFAULT;
1874
6cb153ca 1875 sock = sockfd_lookup_light(fd, &err, &fput_needed);
89bddce5 1876 if (!sock)
1da177e4
LT
1877 goto out;
1878
1879 /* do not move before msg_sys is valid */
1880 err = -EMSGSIZE;
1881 if (msg_sys.msg_iovlen > UIO_MAXIOV)
1882 goto out_put;
1883
89bddce5 1884 /* Check whether to allocate the iovec area */
1da177e4
LT
1885 err = -ENOMEM;
1886 iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1887 if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1888 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1889 if (!iov)
1890 goto out_put;
1891 }
1892
1893 /* This will also move the address data into kernel space */
1894 if (MSG_CMSG_COMPAT & flags) {
230b1839
YH
1895 err = verify_compat_iovec(&msg_sys, iov,
1896 (struct sockaddr *)&address,
1897 VERIFY_READ);
1da177e4 1898 } else
230b1839
YH
1899 err = verify_iovec(&msg_sys, iov,
1900 (struct sockaddr *)&address,
1901 VERIFY_READ);
89bddce5 1902 if (err < 0)
1da177e4
LT
1903 goto out_freeiov;
1904 total_len = err;
1905
1906 err = -ENOBUFS;
1907
1908 if (msg_sys.msg_controllen > INT_MAX)
1909 goto out_freeiov;
89bddce5 1910 ctl_len = msg_sys.msg_controllen;
1da177e4 1911 if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
89bddce5
SH
1912 err =
1913 cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl,
1914 sizeof(ctl));
1da177e4
LT
1915 if (err)
1916 goto out_freeiov;
1917 ctl_buf = msg_sys.msg_control;
8920e8f9 1918 ctl_len = msg_sys.msg_controllen;
1da177e4 1919 } else if (ctl_len) {
89bddce5 1920 if (ctl_len > sizeof(ctl)) {
1da177e4 1921 ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
89bddce5 1922 if (ctl_buf == NULL)
1da177e4
LT
1923 goto out_freeiov;
1924 }
1925 err = -EFAULT;
1926 /*
1927 * Careful! Before this, msg_sys.msg_control contains a user pointer.
1928 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1929 * checking falls down on this.
1930 */
89bddce5
SH
1931 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control,
1932 ctl_len))
1da177e4
LT
1933 goto out_freectl;
1934 msg_sys.msg_control = ctl_buf;
1935 }
1936 msg_sys.msg_flags = flags;
1937
1938 if (sock->file->f_flags & O_NONBLOCK)
1939 msg_sys.msg_flags |= MSG_DONTWAIT;
1940 err = sock_sendmsg(sock, &msg_sys, total_len);
1941
1942out_freectl:
89bddce5 1943 if (ctl_buf != ctl)
1da177e4
LT
1944 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1945out_freeiov:
1946 if (iov != iovstack)
1947 sock_kfree_s(sock->sk, iov, iov_size);
1948out_put:
6cb153ca 1949 fput_light(sock->file, fput_needed);
89bddce5 1950out:
1da177e4
LT
1951 return err;
1952}
1953
a2e27255
ACM
1954static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
1955 struct msghdr *msg_sys, unsigned flags, int nosec)
1da177e4 1956{
89bddce5
SH
1957 struct compat_msghdr __user *msg_compat =
1958 (struct compat_msghdr __user *)msg;
1da177e4 1959 struct iovec iovstack[UIO_FASTIOV];
89bddce5 1960 struct iovec *iov = iovstack;
1da177e4
LT
1961 unsigned long cmsg_ptr;
1962 int err, iov_size, total_len, len;
1963
1964 /* kernel mode address */
230b1839 1965 struct sockaddr_storage addr;
1da177e4
LT
1966
1967 /* user mode address pointers */
1968 struct sockaddr __user *uaddr;
1969 int __user *uaddr_len;
89bddce5 1970
1da177e4 1971 if (MSG_CMSG_COMPAT & flags) {
a2e27255 1972 if (get_compat_msghdr(msg_sys, msg_compat))
1da177e4 1973 return -EFAULT;
c6d409cf 1974 } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
89bddce5 1975 return -EFAULT;
1da177e4 1976
1da177e4 1977 err = -EMSGSIZE;
a2e27255
ACM
1978 if (msg_sys->msg_iovlen > UIO_MAXIOV)
1979 goto out;
89bddce5
SH
1980
1981 /* Check whether to allocate the iovec area */
1da177e4 1982 err = -ENOMEM;
a2e27255
ACM
1983 iov_size = msg_sys->msg_iovlen * sizeof(struct iovec);
1984 if (msg_sys->msg_iovlen > UIO_FASTIOV) {
1da177e4
LT
1985 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1986 if (!iov)
a2e27255 1987 goto out;
1da177e4
LT
1988 }
1989
1990 /*
89bddce5
SH
1991 * Save the user-mode address (verify_iovec will change the
1992 * kernel msghdr to use the kernel address space)
1da177e4 1993 */
89bddce5 1994
a2e27255 1995 uaddr = (__force void __user *)msg_sys->msg_name;
1da177e4
LT
1996 uaddr_len = COMPAT_NAMELEN(msg);
1997 if (MSG_CMSG_COMPAT & flags) {
a2e27255 1998 err = verify_compat_iovec(msg_sys, iov,
230b1839
YH
1999 (struct sockaddr *)&addr,
2000 VERIFY_WRITE);
1da177e4 2001 } else
a2e27255 2002 err = verify_iovec(msg_sys, iov,
230b1839
YH
2003 (struct sockaddr *)&addr,
2004 VERIFY_WRITE);
1da177e4
LT
2005 if (err < 0)
2006 goto out_freeiov;
89bddce5 2007 total_len = err;
1da177e4 2008
a2e27255
ACM
2009 cmsg_ptr = (unsigned long)msg_sys->msg_control;
2010 msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
89bddce5 2011
1da177e4
LT
2012 if (sock->file->f_flags & O_NONBLOCK)
2013 flags |= MSG_DONTWAIT;
a2e27255
ACM
2014 err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
2015 total_len, flags);
1da177e4
LT
2016 if (err < 0)
2017 goto out_freeiov;
2018 len = err;
2019
2020 if (uaddr != NULL) {
230b1839 2021 err = move_addr_to_user((struct sockaddr *)&addr,
a2e27255 2022 msg_sys->msg_namelen, uaddr,
89bddce5 2023 uaddr_len);
1da177e4
LT
2024 if (err < 0)
2025 goto out_freeiov;
2026 }
a2e27255 2027 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT),
37f7f421 2028 COMPAT_FLAGS(msg));
1da177e4
LT
2029 if (err)
2030 goto out_freeiov;
2031 if (MSG_CMSG_COMPAT & flags)
a2e27255 2032 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2033 &msg_compat->msg_controllen);
2034 else
a2e27255 2035 err = __put_user((unsigned long)msg_sys->msg_control - cmsg_ptr,
1da177e4
LT
2036 &msg->msg_controllen);
2037 if (err)
2038 goto out_freeiov;
2039 err = len;
2040
2041out_freeiov:
2042 if (iov != iovstack)
2043 sock_kfree_s(sock->sk, iov, iov_size);
a2e27255
ACM
2044out:
2045 return err;
2046}
2047
2048/*
2049 * BSD recvmsg interface
2050 */
2051
2052SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
2053 unsigned int, flags)
2054{
2055 int fput_needed, err;
2056 struct msghdr msg_sys;
2057 struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed);
2058
2059 if (!sock)
2060 goto out;
2061
2062 err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0);
2063
6cb153ca 2064 fput_light(sock->file, fput_needed);
1da177e4
LT
2065out:
2066 return err;
2067}
2068
a2e27255
ACM
2069/*
2070 * Linux recvmmsg interface
2071 */
2072
2073int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
2074 unsigned int flags, struct timespec *timeout)
2075{
2076 int fput_needed, err, datagrams;
2077 struct socket *sock;
2078 struct mmsghdr __user *entry;
d7256d0e 2079 struct compat_mmsghdr __user *compat_entry;
a2e27255
ACM
2080 struct msghdr msg_sys;
2081 struct timespec end_time;
2082
2083 if (timeout &&
2084 poll_select_set_timeout(&end_time, timeout->tv_sec,
2085 timeout->tv_nsec))
2086 return -EINVAL;
2087
2088 datagrams = 0;
2089
2090 sock = sockfd_lookup_light(fd, &err, &fput_needed);
2091 if (!sock)
2092 return err;
2093
2094 err = sock_error(sock->sk);
2095 if (err)
2096 goto out_put;
2097
2098 entry = mmsg;
d7256d0e 2099 compat_entry = (struct compat_mmsghdr __user *)mmsg;
a2e27255
ACM
2100
2101 while (datagrams < vlen) {
2102 /*
2103 * No need to ask LSM for more than the first datagram.
2104 */
d7256d0e
JMG
2105 if (MSG_CMSG_COMPAT & flags) {
2106 err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
2107 &msg_sys, flags, datagrams);
2108 if (err < 0)
2109 break;
2110 err = __put_user(err, &compat_entry->msg_len);
2111 ++compat_entry;
2112 } else {
2113 err = __sys_recvmsg(sock, (struct msghdr __user *)entry,
2114 &msg_sys, flags, datagrams);
2115 if (err < 0)
2116 break;
2117 err = put_user(err, &entry->msg_len);
2118 ++entry;
2119 }
2120
a2e27255
ACM
2121 if (err)
2122 break;
a2e27255
ACM
2123 ++datagrams;
2124
71c5c159
BB
2125 /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */
2126 if (flags & MSG_WAITFORONE)
2127 flags |= MSG_DONTWAIT;
2128
a2e27255
ACM
2129 if (timeout) {
2130 ktime_get_ts(timeout);
2131 *timeout = timespec_sub(end_time, *timeout);
2132 if (timeout->tv_sec < 0) {
2133 timeout->tv_sec = timeout->tv_nsec = 0;
2134 break;
2135 }
2136
2137 /* Timeout, return less than vlen datagrams */
2138 if (timeout->tv_nsec == 0 && timeout->tv_sec == 0)
2139 break;
2140 }
2141
2142 /* Out of band data, return right away */
2143 if (msg_sys.msg_flags & MSG_OOB)
2144 break;
2145 }
2146
2147out_put:
2148 fput_light(sock->file, fput_needed);
1da177e4 2149
a2e27255
ACM
2150 if (err == 0)
2151 return datagrams;
2152
2153 if (datagrams != 0) {
2154 /*
2155 * We may return less entries than requested (vlen) if the
2156 * sock is non block and there aren't enough datagrams...
2157 */
2158 if (err != -EAGAIN) {
2159 /*
2160 * ... or if recvmsg returns an error after we
2161 * received some datagrams, where we record the
2162 * error to return on the next call or if the
2163 * app asks about it using getsockopt(SO_ERROR).
2164 */
2165 sock->sk->sk_err = -err;
2166 }
2167
2168 return datagrams;
2169 }
2170
2171 return err;
2172}
2173
2174SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
2175 unsigned int, vlen, unsigned int, flags,
2176 struct timespec __user *, timeout)
2177{
2178 int datagrams;
2179 struct timespec timeout_sys;
2180
2181 if (!timeout)
2182 return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
2183
2184 if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
2185 return -EFAULT;
2186
2187 datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
2188
2189 if (datagrams > 0 &&
2190 copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
2191 datagrams = -EFAULT;
2192
2193 return datagrams;
2194}
2195
2196#ifdef __ARCH_WANT_SYS_SOCKETCALL
1da177e4
LT
2197/* Argument list sizes for sys_socketcall */
2198#define AL(x) ((x) * sizeof(unsigned long))
a2e27255 2199static const unsigned char nargs[20] = {
c6d409cf
ED
2200 AL(0), AL(3), AL(3), AL(3), AL(2), AL(3),
2201 AL(3), AL(3), AL(4), AL(4), AL(4), AL(6),
2202 AL(6), AL(2), AL(5), AL(5), AL(3), AL(3),
2203 AL(4), AL(5)
89bddce5
SH
2204};
2205
1da177e4
LT
2206#undef AL
2207
2208/*
89bddce5 2209 * System call vectors.
1da177e4
LT
2210 *
2211 * Argument checking cleaned up. Saved 20% in size.
2212 * This function doesn't need to set the kernel lock because
89bddce5 2213 * it is set by the callees.
1da177e4
LT
2214 */
2215
3e0fa65f 2216SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
1da177e4
LT
2217{
2218 unsigned long a[6];
89bddce5 2219 unsigned long a0, a1;
1da177e4 2220 int err;
47379052 2221 unsigned int len;
1da177e4 2222
a2e27255 2223 if (call < 1 || call > SYS_RECVMMSG)
1da177e4
LT
2224 return -EINVAL;
2225
47379052
AV
2226 len = nargs[call];
2227 if (len > sizeof(a))
2228 return -EINVAL;
2229
1da177e4 2230 /* copy_from_user should be SMP safe. */
47379052 2231 if (copy_from_user(a, args, len))
1da177e4 2232 return -EFAULT;
3ec3b2fb 2233
f3298dc4 2234 audit_socketcall(nargs[call] / sizeof(unsigned long), a);
3ec3b2fb 2235
89bddce5
SH
2236 a0 = a[0];
2237 a1 = a[1];
2238
2239 switch (call) {
2240 case SYS_SOCKET:
2241 err = sys_socket(a0, a1, a[2]);
2242 break;
2243 case SYS_BIND:
2244 err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
2245 break;
2246 case SYS_CONNECT:
2247 err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
2248 break;
2249 case SYS_LISTEN:
2250 err = sys_listen(a0, a1);
2251 break;
2252 case SYS_ACCEPT:
de11defe
UD
2253 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2254 (int __user *)a[2], 0);
89bddce5
SH
2255 break;
2256 case SYS_GETSOCKNAME:
2257 err =
2258 sys_getsockname(a0, (struct sockaddr __user *)a1,
2259 (int __user *)a[2]);
2260 break;
2261 case SYS_GETPEERNAME:
2262 err =
2263 sys_getpeername(a0, (struct sockaddr __user *)a1,
2264 (int __user *)a[2]);
2265 break;
2266 case SYS_SOCKETPAIR:
2267 err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]);
2268 break;
2269 case SYS_SEND:
2270 err = sys_send(a0, (void __user *)a1, a[2], a[3]);
2271 break;
2272 case SYS_SENDTO:
2273 err = sys_sendto(a0, (void __user *)a1, a[2], a[3],
2274 (struct sockaddr __user *)a[4], a[5]);
2275 break;
2276 case SYS_RECV:
2277 err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
2278 break;
2279 case SYS_RECVFROM:
2280 err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
2281 (struct sockaddr __user *)a[4],
2282 (int __user *)a[5]);
2283 break;
2284 case SYS_SHUTDOWN:
2285 err = sys_shutdown(a0, a1);
2286 break;
2287 case SYS_SETSOCKOPT:
2288 err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
2289 break;
2290 case SYS_GETSOCKOPT:
2291 err =
2292 sys_getsockopt(a0, a1, a[2], (char __user *)a[3],
2293 (int __user *)a[4]);
2294 break;
2295 case SYS_SENDMSG:
2296 err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
2297 break;
2298 case SYS_RECVMSG:
2299 err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
2300 break;
a2e27255
ACM
2301 case SYS_RECVMMSG:
2302 err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
2303 (struct timespec __user *)a[4]);
2304 break;
de11defe
UD
2305 case SYS_ACCEPT4:
2306 err = sys_accept4(a0, (struct sockaddr __user *)a1,
2307 (int __user *)a[2], a[3]);
aaca0bdc 2308 break;
89bddce5
SH
2309 default:
2310 err = -EINVAL;
2311 break;
1da177e4
LT
2312 }
2313 return err;
2314}
2315
89bddce5 2316#endif /* __ARCH_WANT_SYS_SOCKETCALL */
1da177e4 2317
55737fda
SH
2318/**
2319 * sock_register - add a socket protocol handler
2320 * @ops: description of protocol
2321 *
1da177e4
LT
2322 * This function is called by a protocol handler that wants to
2323 * advertise its address family, and have it linked into the
55737fda
SH
2324 * socket interface. The value ops->family coresponds to the
2325 * socket system call protocol family.
1da177e4 2326 */
f0fd27d4 2327int sock_register(const struct net_proto_family *ops)
1da177e4
LT
2328{
2329 int err;
2330
2331 if (ops->family >= NPROTO) {
89bddce5
SH
2332 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family,
2333 NPROTO);
1da177e4
LT
2334 return -ENOBUFS;
2335 }
55737fda
SH
2336
2337 spin_lock(&net_family_lock);
2338 if (net_families[ops->family])
2339 err = -EEXIST;
2340 else {
89bddce5 2341 net_families[ops->family] = ops;
1da177e4
LT
2342 err = 0;
2343 }
55737fda
SH
2344 spin_unlock(&net_family_lock);
2345
89bddce5 2346 printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family);
1da177e4
LT
2347 return err;
2348}
c6d409cf 2349EXPORT_SYMBOL(sock_register);
1da177e4 2350
55737fda
SH
2351/**
2352 * sock_unregister - remove a protocol handler
2353 * @family: protocol family to remove
2354 *
1da177e4
LT
2355 * This function is called by a protocol handler that wants to
2356 * remove its address family, and have it unlinked from the
55737fda
SH
2357 * new socket creation.
2358 *
2359 * If protocol handler is a module, then it can use module reference
2360 * counts to protect against new references. If protocol handler is not
2361 * a module then it needs to provide its own protection in
2362 * the ops->create routine.
1da177e4 2363 */
f0fd27d4 2364void sock_unregister(int family)
1da177e4 2365{
f0fd27d4 2366 BUG_ON(family < 0 || family >= NPROTO);
1da177e4 2367
55737fda 2368 spin_lock(&net_family_lock);
89bddce5 2369 net_families[family] = NULL;
55737fda
SH
2370 spin_unlock(&net_family_lock);
2371
2372 synchronize_rcu();
2373
89bddce5 2374 printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
1da177e4 2375}
c6d409cf 2376EXPORT_SYMBOL(sock_unregister);
1da177e4 2377
77d76ea3 2378static int __init sock_init(void)
1da177e4
LT
2379{
2380 /*
89bddce5 2381 * Initialize sock SLAB cache.
1da177e4 2382 */
89bddce5 2383
1da177e4
LT
2384 sk_init();
2385
1da177e4 2386 /*
89bddce5 2387 * Initialize skbuff SLAB cache
1da177e4
LT
2388 */
2389 skb_init();
1da177e4
LT
2390
2391 /*
89bddce5 2392 * Initialize the protocols module.
1da177e4
LT
2393 */
2394
2395 init_inodecache();
2396 register_filesystem(&sock_fs_type);
2397 sock_mnt = kern_mount(&sock_fs_type);
77d76ea3
AK
2398
2399 /* The real protocol initialization is performed in later initcalls.
1da177e4
LT
2400 */
2401
2402#ifdef CONFIG_NETFILTER
2403 netfilter_init();
2404#endif
cbeb321a
DM
2405
2406 return 0;
1da177e4
LT
2407}
2408
77d76ea3
AK
2409core_initcall(sock_init); /* early initcall */
2410
1da177e4
LT
2411#ifdef CONFIG_PROC_FS
2412void socket_seq_show(struct seq_file *seq)
2413{
2414 int cpu;
2415 int counter = 0;
2416
6f912042 2417 for_each_possible_cpu(cpu)
89bddce5 2418 counter += per_cpu(sockets_in_use, cpu);
1da177e4
LT
2419
2420 /* It can be negative, by the way. 8) */
2421 if (counter < 0)
2422 counter = 0;
2423
2424 seq_printf(seq, "sockets: used %d\n", counter);
2425}
89bddce5 2426#endif /* CONFIG_PROC_FS */
1da177e4 2427
89bbfc95 2428#ifdef CONFIG_COMPAT
6b96018b
AB
2429static int do_siocgstamp(struct net *net, struct socket *sock,
2430 unsigned int cmd, struct compat_timeval __user *up)
7a229387 2431{
7a229387
AB
2432 mm_segment_t old_fs = get_fs();
2433 struct timeval ktv;
2434 int err;
2435
2436 set_fs(KERNEL_DS);
6b96018b 2437 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv);
7a229387
AB
2438 set_fs(old_fs);
2439 if (!err) {
2440 err = put_user(ktv.tv_sec, &up->tv_sec);
2441 err |= __put_user(ktv.tv_usec, &up->tv_usec);
2442 }
2443 return err;
2444}
2445
6b96018b
AB
2446static int do_siocgstampns(struct net *net, struct socket *sock,
2447 unsigned int cmd, struct compat_timespec __user *up)
7a229387 2448{
7a229387
AB
2449 mm_segment_t old_fs = get_fs();
2450 struct timespec kts;
2451 int err;
2452
2453 set_fs(KERNEL_DS);
6b96018b 2454 err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts);
7a229387
AB
2455 set_fs(old_fs);
2456 if (!err) {
2457 err = put_user(kts.tv_sec, &up->tv_sec);
2458 err |= __put_user(kts.tv_nsec, &up->tv_nsec);
2459 }
2460 return err;
2461}
2462
6b96018b 2463static int dev_ifname32(struct net *net, struct compat_ifreq __user *uifr32)
7a229387
AB
2464{
2465 struct ifreq __user *uifr;
2466 int err;
2467
2468 uifr = compat_alloc_user_space(sizeof(struct ifreq));
6b96018b 2469 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2470 return -EFAULT;
2471
6b96018b 2472 err = dev_ioctl(net, SIOCGIFNAME, uifr);
7a229387
AB
2473 if (err)
2474 return err;
2475
6b96018b 2476 if (copy_in_user(uifr32, uifr, sizeof(struct compat_ifreq)))
7a229387
AB
2477 return -EFAULT;
2478
2479 return 0;
2480}
2481
6b96018b 2482static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32)
7a229387 2483{
6b96018b 2484 struct compat_ifconf ifc32;
7a229387
AB
2485 struct ifconf ifc;
2486 struct ifconf __user *uifc;
6b96018b 2487 struct compat_ifreq __user *ifr32;
7a229387
AB
2488 struct ifreq __user *ifr;
2489 unsigned int i, j;
2490 int err;
2491
6b96018b 2492 if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2493 return -EFAULT;
2494
2495 if (ifc32.ifcbuf == 0) {
2496 ifc32.ifc_len = 0;
2497 ifc.ifc_len = 0;
2498 ifc.ifc_req = NULL;
2499 uifc = compat_alloc_user_space(sizeof(struct ifconf));
2500 } else {
c6d409cf
ED
2501 size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) *
2502 sizeof(struct ifreq);
7a229387
AB
2503 uifc = compat_alloc_user_space(sizeof(struct ifconf) + len);
2504 ifc.ifc_len = len;
2505 ifr = ifc.ifc_req = (void __user *)(uifc + 1);
2506 ifr32 = compat_ptr(ifc32.ifcbuf);
c6d409cf 2507 for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) {
6b96018b 2508 if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2509 return -EFAULT;
2510 ifr++;
2511 ifr32++;
2512 }
2513 }
2514 if (copy_to_user(uifc, &ifc, sizeof(struct ifconf)))
2515 return -EFAULT;
2516
6b96018b 2517 err = dev_ioctl(net, SIOCGIFCONF, uifc);
7a229387
AB
2518 if (err)
2519 return err;
2520
2521 if (copy_from_user(&ifc, uifc, sizeof(struct ifconf)))
2522 return -EFAULT;
2523
2524 ifr = ifc.ifc_req;
2525 ifr32 = compat_ptr(ifc32.ifcbuf);
2526 for (i = 0, j = 0;
c6d409cf
ED
2527 i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len;
2528 i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) {
2529 if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq)))
7a229387
AB
2530 return -EFAULT;
2531 ifr32++;
2532 ifr++;
2533 }
2534
2535 if (ifc32.ifcbuf == 0) {
2536 /* Translate from 64-bit structure multiple to
2537 * a 32-bit one.
2538 */
2539 i = ifc.ifc_len;
6b96018b 2540 i = ((i / sizeof(struct ifreq)) * sizeof(struct compat_ifreq));
7a229387
AB
2541 ifc32.ifc_len = i;
2542 } else {
2543 ifc32.ifc_len = i;
2544 }
6b96018b 2545 if (copy_to_user(uifc32, &ifc32, sizeof(struct compat_ifconf)))
7a229387
AB
2546 return -EFAULT;
2547
2548 return 0;
2549}
2550
6b96018b 2551static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
7a229387
AB
2552{
2553 struct ifreq __user *ifr;
7a229387
AB
2554 u32 data;
2555 void __user *datap;
2556
2557 ifr = compat_alloc_user_space(sizeof(*ifr));
7a229387
AB
2558
2559 if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2560 return -EFAULT;
2561
2562 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2563 return -EFAULT;
2564
2565 datap = compat_ptr(data);
2566 if (put_user(datap, &ifr->ifr_ifru.ifru_data))
2567 return -EFAULT;
2568
6b96018b 2569 return dev_ioctl(net, SIOCETHTOOL, ifr);
7a229387
AB
2570}
2571
7a50a240
AB
2572static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32)
2573{
2574 void __user *uptr;
2575 compat_uptr_t uptr32;
2576 struct ifreq __user *uifr;
2577
c6d409cf 2578 uifr = compat_alloc_user_space(sizeof(*uifr));
7a50a240
AB
2579 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2580 return -EFAULT;
2581
2582 if (get_user(uptr32, &uifr32->ifr_settings.ifs_ifsu))
2583 return -EFAULT;
2584
2585 uptr = compat_ptr(uptr32);
2586
2587 if (put_user(uptr, &uifr->ifr_settings.ifs_ifsu.raw_hdlc))
2588 return -EFAULT;
2589
2590 return dev_ioctl(net, SIOCWANDEV, uifr);
2591}
2592
6b96018b
AB
2593static int bond_ioctl(struct net *net, unsigned int cmd,
2594 struct compat_ifreq __user *ifr32)
7a229387
AB
2595{
2596 struct ifreq kifr;
2597 struct ifreq __user *uifr;
7a229387
AB
2598 mm_segment_t old_fs;
2599 int err;
2600 u32 data;
2601 void __user *datap;
2602
2603 switch (cmd) {
2604 case SIOCBONDENSLAVE:
2605 case SIOCBONDRELEASE:
2606 case SIOCBONDSETHWADDR:
2607 case SIOCBONDCHANGEACTIVE:
6b96018b 2608 if (copy_from_user(&kifr, ifr32, sizeof(struct compat_ifreq)))
7a229387
AB
2609 return -EFAULT;
2610
2611 old_fs = get_fs();
c6d409cf 2612 set_fs(KERNEL_DS);
6b96018b 2613 err = dev_ioctl(net, cmd, &kifr);
c6d409cf 2614 set_fs(old_fs);
7a229387
AB
2615
2616 return err;
2617 case SIOCBONDSLAVEINFOQUERY:
2618 case SIOCBONDINFOQUERY:
2619 uifr = compat_alloc_user_space(sizeof(*uifr));
2620 if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ))
2621 return -EFAULT;
2622
2623 if (get_user(data, &ifr32->ifr_ifru.ifru_data))
2624 return -EFAULT;
2625
2626 datap = compat_ptr(data);
2627 if (put_user(datap, &uifr->ifr_ifru.ifru_data))
2628 return -EFAULT;
2629
6b96018b 2630 return dev_ioctl(net, cmd, uifr);
7a229387
AB
2631 default:
2632 return -EINVAL;
ccbd6a5a 2633 }
7a229387
AB
2634}
2635
6b96018b
AB
2636static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
2637 struct compat_ifreq __user *u_ifreq32)
7a229387
AB
2638{
2639 struct ifreq __user *u_ifreq64;
7a229387
AB
2640 char tmp_buf[IFNAMSIZ];
2641 void __user *data64;
2642 u32 data32;
2643
2644 if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]),
2645 IFNAMSIZ))
2646 return -EFAULT;
2647 if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data))
2648 return -EFAULT;
2649 data64 = compat_ptr(data32);
2650
2651 u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64));
2652
2653 /* Don't check these user accesses, just let that get trapped
2654 * in the ioctl handler instead.
2655 */
2656 if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0],
2657 IFNAMSIZ))
2658 return -EFAULT;
2659 if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data))
2660 return -EFAULT;
2661
6b96018b 2662 return dev_ioctl(net, cmd, u_ifreq64);
7a229387
AB
2663}
2664
6b96018b
AB
2665static int dev_ifsioc(struct net *net, struct socket *sock,
2666 unsigned int cmd, struct compat_ifreq __user *uifr32)
7a229387 2667{
a2116ed2 2668 struct ifreq __user *uifr;
7a229387
AB
2669 int err;
2670
a2116ed2
AB
2671 uifr = compat_alloc_user_space(sizeof(*uifr));
2672 if (copy_in_user(uifr, uifr32, sizeof(*uifr32)))
2673 return -EFAULT;
2674
2675 err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr);
2676
7a229387
AB
2677 if (!err) {
2678 switch (cmd) {
2679 case SIOCGIFFLAGS:
2680 case SIOCGIFMETRIC:
2681 case SIOCGIFMTU:
2682 case SIOCGIFMEM:
2683 case SIOCGIFHWADDR:
2684 case SIOCGIFINDEX:
2685 case SIOCGIFADDR:
2686 case SIOCGIFBRDADDR:
2687 case SIOCGIFDSTADDR:
2688 case SIOCGIFNETMASK:
fab2532b 2689 case SIOCGIFPFLAGS:
7a229387 2690 case SIOCGIFTXQLEN:
fab2532b
AB
2691 case SIOCGMIIPHY:
2692 case SIOCGMIIREG:
a2116ed2 2693 if (copy_in_user(uifr32, uifr, sizeof(*uifr32)))
7a229387
AB
2694 err = -EFAULT;
2695 break;
2696 }
2697 }
2698 return err;
2699}
2700
a2116ed2
AB
2701static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
2702 struct compat_ifreq __user *uifr32)
2703{
2704 struct ifreq ifr;
2705 struct compat_ifmap __user *uifmap32;
2706 mm_segment_t old_fs;
2707 int err;
2708
2709 uifmap32 = &uifr32->ifr_ifru.ifru_map;
2710 err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
2711 err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2712 err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2713 err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2714 err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
2715 err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
2716 err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
2717 if (err)
2718 return -EFAULT;
2719
2720 old_fs = get_fs();
c6d409cf 2721 set_fs(KERNEL_DS);
a2116ed2 2722 err = dev_ioctl(net, cmd, (void __user *)&ifr);
c6d409cf 2723 set_fs(old_fs);
a2116ed2
AB
2724
2725 if (cmd == SIOCGIFMAP && !err) {
2726 err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
2727 err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
2728 err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
2729 err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
2730 err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
2731 err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
2732 err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
2733 if (err)
2734 err = -EFAULT;
2735 }
2736 return err;
2737}
2738
2739static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32)
2740{
2741 void __user *uptr;
2742 compat_uptr_t uptr32;
2743 struct ifreq __user *uifr;
2744
c6d409cf 2745 uifr = compat_alloc_user_space(sizeof(*uifr));
a2116ed2
AB
2746 if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq)))
2747 return -EFAULT;
2748
2749 if (get_user(uptr32, &uifr32->ifr_data))
2750 return -EFAULT;
2751
2752 uptr = compat_ptr(uptr32);
2753
2754 if (put_user(uptr, &uifr->ifr_data))
2755 return -EFAULT;
2756
2757 return dev_ioctl(net, SIOCSHWTSTAMP, uifr);
2758}
2759
7a229387 2760struct rtentry32 {
c6d409cf 2761 u32 rt_pad1;
7a229387
AB
2762 struct sockaddr rt_dst; /* target address */
2763 struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */
2764 struct sockaddr rt_genmask; /* target network mask (IP) */
c6d409cf
ED
2765 unsigned short rt_flags;
2766 short rt_pad2;
2767 u32 rt_pad3;
2768 unsigned char rt_tos;
2769 unsigned char rt_class;
2770 short rt_pad4;
2771 short rt_metric; /* +1 for binary compatibility! */
7a229387 2772 /* char * */ u32 rt_dev; /* forcing the device at add */
c6d409cf
ED
2773 u32 rt_mtu; /* per route MTU/Window */
2774 u32 rt_window; /* Window clamping */
7a229387
AB
2775 unsigned short rt_irtt; /* Initial RTT */
2776};
2777
2778struct in6_rtmsg32 {
2779 struct in6_addr rtmsg_dst;
2780 struct in6_addr rtmsg_src;
2781 struct in6_addr rtmsg_gateway;
2782 u32 rtmsg_type;
2783 u16 rtmsg_dst_len;
2784 u16 rtmsg_src_len;
2785 u32 rtmsg_metric;
2786 u32 rtmsg_info;
2787 u32 rtmsg_flags;
2788 s32 rtmsg_ifindex;
2789};
2790
6b96018b
AB
2791static int routing_ioctl(struct net *net, struct socket *sock,
2792 unsigned int cmd, void __user *argp)
7a229387
AB
2793{
2794 int ret;
2795 void *r = NULL;
2796 struct in6_rtmsg r6;
2797 struct rtentry r4;
2798 char devname[16];
2799 u32 rtdev;
2800 mm_segment_t old_fs = get_fs();
2801
6b96018b
AB
2802 if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
2803 struct in6_rtmsg32 __user *ur6 = argp;
c6d409cf 2804 ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
7a229387 2805 3 * sizeof(struct in6_addr));
c6d409cf
ED
2806 ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
2807 ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
2808 ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
2809 ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
2810 ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
2811 ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
2812 ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
7a229387
AB
2813
2814 r = (void *) &r6;
2815 } else { /* ipv4 */
6b96018b 2816 struct rtentry32 __user *ur4 = argp;
c6d409cf 2817 ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
7a229387 2818 3 * sizeof(struct sockaddr));
c6d409cf
ED
2819 ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
2820 ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
2821 ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
2822 ret |= __get_user(r4.rt_window, &(ur4->rt_window));
2823 ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
2824 ret |= __get_user(rtdev, &(ur4->rt_dev));
7a229387 2825 if (rtdev) {
c6d409cf 2826 ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
7a229387
AB
2827 r4.rt_dev = devname; devname[15] = 0;
2828 } else
2829 r4.rt_dev = NULL;
2830
2831 r = (void *) &r4;
2832 }
2833
2834 if (ret) {
2835 ret = -EFAULT;
2836 goto out;
2837 }
2838
c6d409cf 2839 set_fs(KERNEL_DS);
6b96018b 2840 ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
c6d409cf 2841 set_fs(old_fs);
7a229387
AB
2842
2843out:
7a229387
AB
2844 return ret;
2845}
2846
2847/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
2848 * for some operations; this forces use of the newer bridge-utils that
2849 * use compatiable ioctls
2850 */
6b96018b 2851static int old_bridge_ioctl(compat_ulong_t __user *argp)
7a229387 2852{
6b96018b 2853 compat_ulong_t tmp;
7a229387 2854
6b96018b 2855 if (get_user(tmp, argp))
7a229387
AB
2856 return -EFAULT;
2857 if (tmp == BRCTL_GET_VERSION)
2858 return BRCTL_VERSION + 1;
2859 return -EINVAL;
2860}
2861
6b96018b
AB
2862static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
2863 unsigned int cmd, unsigned long arg)
2864{
2865 void __user *argp = compat_ptr(arg);
2866 struct sock *sk = sock->sk;
2867 struct net *net = sock_net(sk);
7a229387 2868
6b96018b
AB
2869 if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15))
2870 return siocdevprivate_ioctl(net, cmd, argp);
2871
2872 switch (cmd) {
2873 case SIOCSIFBR:
2874 case SIOCGIFBR:
2875 return old_bridge_ioctl(argp);
2876 case SIOCGIFNAME:
2877 return dev_ifname32(net, argp);
2878 case SIOCGIFCONF:
2879 return dev_ifconf(net, argp);
2880 case SIOCETHTOOL:
2881 return ethtool_ioctl(net, argp);
7a50a240
AB
2882 case SIOCWANDEV:
2883 return compat_siocwandev(net, argp);
a2116ed2
AB
2884 case SIOCGIFMAP:
2885 case SIOCSIFMAP:
2886 return compat_sioc_ifmap(net, cmd, argp);
6b96018b
AB
2887 case SIOCBONDENSLAVE:
2888 case SIOCBONDRELEASE:
2889 case SIOCBONDSETHWADDR:
2890 case SIOCBONDSLAVEINFOQUERY:
2891 case SIOCBONDINFOQUERY:
2892 case SIOCBONDCHANGEACTIVE:
2893 return bond_ioctl(net, cmd, argp);
2894 case SIOCADDRT:
2895 case SIOCDELRT:
2896 return routing_ioctl(net, sock, cmd, argp);
2897 case SIOCGSTAMP:
2898 return do_siocgstamp(net, sock, cmd, argp);
2899 case SIOCGSTAMPNS:
2900 return do_siocgstampns(net, sock, cmd, argp);
a2116ed2
AB
2901 case SIOCSHWTSTAMP:
2902 return compat_siocshwtstamp(net, argp);
6b96018b
AB
2903
2904 case FIOSETOWN:
2905 case SIOCSPGRP:
2906 case FIOGETOWN:
2907 case SIOCGPGRP:
2908 case SIOCBRADDBR:
2909 case SIOCBRDELBR:
2910 case SIOCGIFVLAN:
2911 case SIOCSIFVLAN:
2912 case SIOCADDDLCI:
2913 case SIOCDELDLCI:
2914 return sock_ioctl(file, cmd, arg);
2915
2916 case SIOCGIFFLAGS:
2917 case SIOCSIFFLAGS:
2918 case SIOCGIFMETRIC:
2919 case SIOCSIFMETRIC:
2920 case SIOCGIFMTU:
2921 case SIOCSIFMTU:
2922 case SIOCGIFMEM:
2923 case SIOCSIFMEM:
2924 case SIOCGIFHWADDR:
2925 case SIOCSIFHWADDR:
2926 case SIOCADDMULTI:
2927 case SIOCDELMULTI:
2928 case SIOCGIFINDEX:
6b96018b
AB
2929 case SIOCGIFADDR:
2930 case SIOCSIFADDR:
2931 case SIOCSIFHWBROADCAST:
6b96018b 2932 case SIOCDIFADDR:
6b96018b
AB
2933 case SIOCGIFBRDADDR:
2934 case SIOCSIFBRDADDR:
2935 case SIOCGIFDSTADDR:
2936 case SIOCSIFDSTADDR:
2937 case SIOCGIFNETMASK:
2938 case SIOCSIFNETMASK:
2939 case SIOCSIFPFLAGS:
2940 case SIOCGIFPFLAGS:
2941 case SIOCGIFTXQLEN:
2942 case SIOCSIFTXQLEN:
2943 case SIOCBRADDIF:
2944 case SIOCBRDELIF:
9177efd3
AB
2945 case SIOCSIFNAME:
2946 case SIOCGMIIPHY:
2947 case SIOCGMIIREG:
2948 case SIOCSMIIREG:
6b96018b 2949 return dev_ifsioc(net, sock, cmd, argp);
9177efd3 2950
6b96018b
AB
2951 case SIOCSARP:
2952 case SIOCGARP:
2953 case SIOCDARP:
6b96018b 2954 case SIOCATMARK:
9177efd3
AB
2955 return sock_do_ioctl(net, sock, cmd, arg);
2956 }
2957
2958 /* Prevent warning from compat_sys_ioctl, these always
2959 * result in -EINVAL in the native case anyway. */
2960 switch (cmd) {
2961 case SIOCRTMSG:
2962 case SIOCGIFCOUNT:
6b96018b
AB
2963 case SIOCSRARP:
2964 case SIOCGRARP:
2965 case SIOCDRARP:
9177efd3
AB
2966 case SIOCSIFLINK:
2967 case SIOCGIFSLAVE:
2968 case SIOCSIFSLAVE:
2969 return -EINVAL;
6b96018b
AB
2970 }
2971
2972 return -ENOIOCTLCMD;
2973}
7a229387 2974
89bbfc95 2975static long compat_sock_ioctl(struct file *file, unsigned cmd,
89bddce5 2976 unsigned long arg)
89bbfc95
SP
2977{
2978 struct socket *sock = file->private_data;
2979 int ret = -ENOIOCTLCMD;
87de87d5
DM
2980 struct sock *sk;
2981 struct net *net;
2982
2983 sk = sock->sk;
2984 net = sock_net(sk);
89bbfc95
SP
2985
2986 if (sock->ops->compat_ioctl)
2987 ret = sock->ops->compat_ioctl(sock, cmd, arg);
2988
87de87d5
DM
2989 if (ret == -ENOIOCTLCMD &&
2990 (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST))
2991 ret = compat_wext_handle_ioctl(net, cmd, arg);
2992
6b96018b
AB
2993 if (ret == -ENOIOCTLCMD)
2994 ret = compat_sock_ioctl_trans(file, sock, cmd, arg);
2995
89bbfc95
SP
2996 return ret;
2997}
2998#endif
2999
ac5a488e
SS
3000int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
3001{
3002 return sock->ops->bind(sock, addr, addrlen);
3003}
c6d409cf 3004EXPORT_SYMBOL(kernel_bind);
ac5a488e
SS
3005
3006int kernel_listen(struct socket *sock, int backlog)
3007{
3008 return sock->ops->listen(sock, backlog);
3009}
c6d409cf 3010EXPORT_SYMBOL(kernel_listen);
ac5a488e
SS
3011
3012int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
3013{
3014 struct sock *sk = sock->sk;
3015 int err;
3016
3017 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
3018 newsock);
3019 if (err < 0)
3020 goto done;
3021
3022 err = sock->ops->accept(sock, *newsock, flags);
3023 if (err < 0) {
3024 sock_release(*newsock);
fa8705b0 3025 *newsock = NULL;
ac5a488e
SS
3026 goto done;
3027 }
3028
3029 (*newsock)->ops = sock->ops;
1b08534e 3030 __module_get((*newsock)->ops->owner);
ac5a488e
SS
3031
3032done:
3033 return err;
3034}
c6d409cf 3035EXPORT_SYMBOL(kernel_accept);
ac5a488e
SS
3036
3037int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
4768fbcb 3038 int flags)
ac5a488e
SS
3039{
3040 return sock->ops->connect(sock, addr, addrlen, flags);
3041}
c6d409cf 3042EXPORT_SYMBOL(kernel_connect);
ac5a488e
SS
3043
3044int kernel_getsockname(struct socket *sock, struct sockaddr *addr,
3045 int *addrlen)
3046{
3047 return sock->ops->getname(sock, addr, addrlen, 0);
3048}
c6d409cf 3049EXPORT_SYMBOL(kernel_getsockname);
ac5a488e
SS
3050
3051int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
3052 int *addrlen)
3053{
3054 return sock->ops->getname(sock, addr, addrlen, 1);
3055}
c6d409cf 3056EXPORT_SYMBOL(kernel_getpeername);
ac5a488e
SS
3057
3058int kernel_getsockopt(struct socket *sock, int level, int optname,
3059 char *optval, int *optlen)
3060{
3061 mm_segment_t oldfs = get_fs();
3062 int err;
3063
3064 set_fs(KERNEL_DS);
3065 if (level == SOL_SOCKET)
3066 err = sock_getsockopt(sock, level, optname, optval, optlen);
3067 else
3068 err = sock->ops->getsockopt(sock, level, optname, optval,
3069 optlen);
3070 set_fs(oldfs);
3071 return err;
3072}
c6d409cf 3073EXPORT_SYMBOL(kernel_getsockopt);
ac5a488e
SS
3074
3075int kernel_setsockopt(struct socket *sock, int level, int optname,
b7058842 3076 char *optval, unsigned int optlen)
ac5a488e
SS
3077{
3078 mm_segment_t oldfs = get_fs();
3079 int err;
3080
3081 set_fs(KERNEL_DS);
3082 if (level == SOL_SOCKET)
3083 err = sock_setsockopt(sock, level, optname, optval, optlen);
3084 else
3085 err = sock->ops->setsockopt(sock, level, optname, optval,
3086 optlen);
3087 set_fs(oldfs);
3088 return err;
3089}
c6d409cf 3090EXPORT_SYMBOL(kernel_setsockopt);
ac5a488e
SS
3091
3092int kernel_sendpage(struct socket *sock, struct page *page, int offset,
3093 size_t size, int flags)
3094{
f8451725
HX
3095 sock_update_classid(sock->sk);
3096
ac5a488e
SS
3097 if (sock->ops->sendpage)
3098 return sock->ops->sendpage(sock, page, offset, size, flags);
3099
3100 return sock_no_sendpage(sock, page, offset, size, flags);
3101}
c6d409cf 3102EXPORT_SYMBOL(kernel_sendpage);
ac5a488e
SS
3103
3104int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg)
3105{
3106 mm_segment_t oldfs = get_fs();
3107 int err;
3108
3109 set_fs(KERNEL_DS);
3110 err = sock->ops->ioctl(sock, cmd, arg);
3111 set_fs(oldfs);
3112
3113 return err;
3114}
c6d409cf 3115EXPORT_SYMBOL(kernel_sock_ioctl);
ac5a488e 3116
91cf45f0
TM
3117int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
3118{
3119 return sock->ops->shutdown(sock, how);
3120}
91cf45f0 3121EXPORT_SYMBOL(kernel_sock_shutdown);