]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/unix/af_unix.c
[AF_UNIX]: Datagram getpeersec
[net-next-2.6.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan.cox@linux.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
75 *
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
83 */
84
85#include <linux/module.h>
86#include <linux/config.h>
87#include <linux/kernel.h>
1da177e4
LT
88#include <linux/signal.h>
89#include <linux/sched.h>
90#include <linux/errno.h>
91#include <linux/string.h>
92#include <linux/stat.h>
93#include <linux/dcache.h>
94#include <linux/namei.h>
95#include <linux/socket.h>
96#include <linux/un.h>
97#include <linux/fcntl.h>
98#include <linux/termios.h>
99#include <linux/sockios.h>
100#include <linux/net.h>
101#include <linux/in.h>
102#include <linux/fs.h>
103#include <linux/slab.h>
104#include <asm/uaccess.h>
105#include <linux/skbuff.h>
106#include <linux/netdevice.h>
107#include <net/sock.h>
c752f073 108#include <net/tcp_states.h>
1da177e4
LT
109#include <net/af_unix.h>
110#include <linux/proc_fs.h>
111#include <linux/seq_file.h>
112#include <net/scm.h>
113#include <linux/init.h>
114#include <linux/poll.h>
115#include <linux/smp_lock.h>
116#include <linux/rtnetlink.h>
117#include <linux/mount.h>
118#include <net/checksum.h>
119#include <linux/security.h>
120
121int sysctl_unix_max_dgram_qlen = 10;
122
123struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
fbe9cc4a 124DEFINE_SPINLOCK(unix_table_lock);
1da177e4
LT
125static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126
127#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
128
129#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130
877ce7c1
CZ
131#ifdef CONFIG_SECURITY_NETWORK
132static void unix_get_peersec_dgram(struct sk_buff *skb)
133{
134 int err;
135
136 err = security_socket_getpeersec_dgram(skb, UNIXSECDATA(skb),
137 UNIXSECLEN(skb));
138 if (err)
139 *(UNIXSECDATA(skb)) = NULL;
140}
141
142static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143{
144 scm->secdata = *UNIXSECDATA(skb);
145 scm->seclen = *UNIXSECLEN(skb);
146}
147#else
148static void unix_get_peersec_dgram(struct sk_buff *skb)
149{ }
150
151static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
152{ }
153#endif /* CONFIG_SECURITY_NETWORK */
154
1da177e4
LT
155/*
156 * SMP locking strategy:
fbe9cc4a 157 * hash table is protected with spinlock unix_table_lock
1da177e4
LT
158 * each socket state is protected by separate rwlock.
159 */
160
161static inline unsigned unix_hash_fold(unsigned hash)
162{
163 hash ^= hash>>16;
164 hash ^= hash>>8;
165 return hash&(UNIX_HASH_SIZE-1);
166}
167
168#define unix_peer(sk) (unix_sk(sk)->peer)
169
170static inline int unix_our_peer(struct sock *sk, struct sock *osk)
171{
172 return unix_peer(osk) == sk;
173}
174
175static inline int unix_may_send(struct sock *sk, struct sock *osk)
176{
177 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
178}
179
180static struct sock *unix_peer_get(struct sock *s)
181{
182 struct sock *peer;
183
184 unix_state_rlock(s);
185 peer = unix_peer(s);
186 if (peer)
187 sock_hold(peer);
188 unix_state_runlock(s);
189 return peer;
190}
191
192static inline void unix_release_addr(struct unix_address *addr)
193{
194 if (atomic_dec_and_test(&addr->refcnt))
195 kfree(addr);
196}
197
198/*
199 * Check unix socket name:
200 * - should be not zero length.
201 * - if started by not zero, should be NULL terminated (FS object)
202 * - if started by zero, it is abstract name.
203 */
204
205static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
206{
207 if (len <= sizeof(short) || len > sizeof(*sunaddr))
208 return -EINVAL;
209 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
210 return -EINVAL;
211 if (sunaddr->sun_path[0]) {
212 /*
213 * This may look like an off by one error but it is a bit more
214 * subtle. 108 is the longest valid AF_UNIX path for a binding.
215 * sun_path[108] doesnt as such exist. However in kernel space
216 * we are guaranteed that it is a valid memory location in our
217 * kernel address buffer.
218 */
219 ((char *)sunaddr)[len]=0;
220 len = strlen(sunaddr->sun_path)+1+sizeof(short);
221 return len;
222 }
223
224 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
225 return len;
226}
227
228static void __unix_remove_socket(struct sock *sk)
229{
230 sk_del_node_init(sk);
231}
232
233static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
234{
235 BUG_TRAP(sk_unhashed(sk));
236 sk_add_node(sk, list);
237}
238
239static inline void unix_remove_socket(struct sock *sk)
240{
fbe9cc4a 241 spin_lock(&unix_table_lock);
1da177e4 242 __unix_remove_socket(sk);
fbe9cc4a 243 spin_unlock(&unix_table_lock);
1da177e4
LT
244}
245
246static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
247{
fbe9cc4a 248 spin_lock(&unix_table_lock);
1da177e4 249 __unix_insert_socket(list, sk);
fbe9cc4a 250 spin_unlock(&unix_table_lock);
1da177e4
LT
251}
252
253static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
254 int len, int type, unsigned hash)
255{
256 struct sock *s;
257 struct hlist_node *node;
258
259 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
260 struct unix_sock *u = unix_sk(s);
261
262 if (u->addr->len == len &&
263 !memcmp(u->addr->name, sunname, len))
264 goto found;
265 }
266 s = NULL;
267found:
268 return s;
269}
270
271static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
272 int len, int type,
273 unsigned hash)
274{
275 struct sock *s;
276
fbe9cc4a 277 spin_lock(&unix_table_lock);
1da177e4
LT
278 s = __unix_find_socket_byname(sunname, len, type, hash);
279 if (s)
280 sock_hold(s);
fbe9cc4a 281 spin_unlock(&unix_table_lock);
1da177e4
LT
282 return s;
283}
284
285static struct sock *unix_find_socket_byinode(struct inode *i)
286{
287 struct sock *s;
288 struct hlist_node *node;
289
fbe9cc4a 290 spin_lock(&unix_table_lock);
1da177e4
LT
291 sk_for_each(s, node,
292 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
293 struct dentry *dentry = unix_sk(s)->dentry;
294
295 if(dentry && dentry->d_inode == i)
296 {
297 sock_hold(s);
298 goto found;
299 }
300 }
301 s = NULL;
302found:
fbe9cc4a 303 spin_unlock(&unix_table_lock);
1da177e4
LT
304 return s;
305}
306
307static inline int unix_writable(struct sock *sk)
308{
309 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
310}
311
312static void unix_write_space(struct sock *sk)
313{
314 read_lock(&sk->sk_callback_lock);
315 if (unix_writable(sk)) {
316 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
317 wake_up_interruptible(sk->sk_sleep);
318 sk_wake_async(sk, 2, POLL_OUT);
319 }
320 read_unlock(&sk->sk_callback_lock);
321}
322
323/* When dgram socket disconnects (or changes its peer), we clear its receive
324 * queue of packets arrived from previous peer. First, it allows to do
325 * flow control based only on wmem_alloc; second, sk connected to peer
326 * may receive messages only from that peer. */
327static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
328{
b03efcfb 329 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
330 skb_queue_purge(&sk->sk_receive_queue);
331 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
332
333 /* If one link of bidirectional dgram pipe is disconnected,
334 * we signal error. Messages are lost. Do not make this,
335 * when peer was not connected to us.
336 */
337 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
338 other->sk_err = ECONNRESET;
339 other->sk_error_report(other);
340 }
341 }
342}
343
344static void unix_sock_destructor(struct sock *sk)
345{
346 struct unix_sock *u = unix_sk(sk);
347
348 skb_queue_purge(&sk->sk_receive_queue);
349
350 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
351 BUG_TRAP(sk_unhashed(sk));
352 BUG_TRAP(!sk->sk_socket);
353 if (!sock_flag(sk, SOCK_DEAD)) {
354 printk("Attempt to release alive unix socket: %p\n", sk);
355 return;
356 }
357
358 if (u->addr)
359 unix_release_addr(u->addr);
360
361 atomic_dec(&unix_nr_socks);
362#ifdef UNIX_REFCNT_DEBUG
363 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
364#endif
365}
366
367static int unix_release_sock (struct sock *sk, int embrion)
368{
369 struct unix_sock *u = unix_sk(sk);
370 struct dentry *dentry;
371 struct vfsmount *mnt;
372 struct sock *skpair;
373 struct sk_buff *skb;
374 int state;
375
376 unix_remove_socket(sk);
377
378 /* Clear state */
379 unix_state_wlock(sk);
380 sock_orphan(sk);
381 sk->sk_shutdown = SHUTDOWN_MASK;
382 dentry = u->dentry;
383 u->dentry = NULL;
384 mnt = u->mnt;
385 u->mnt = NULL;
386 state = sk->sk_state;
387 sk->sk_state = TCP_CLOSE;
388 unix_state_wunlock(sk);
389
390 wake_up_interruptible_all(&u->peer_wait);
391
392 skpair=unix_peer(sk);
393
394 if (skpair!=NULL) {
395 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
396 unix_state_wlock(skpair);
397 /* No more writes */
398 skpair->sk_shutdown = SHUTDOWN_MASK;
399 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
400 skpair->sk_err = ECONNRESET;
401 unix_state_wunlock(skpair);
402 skpair->sk_state_change(skpair);
403 read_lock(&skpair->sk_callback_lock);
404 sk_wake_async(skpair,1,POLL_HUP);
405 read_unlock(&skpair->sk_callback_lock);
406 }
407 sock_put(skpair); /* It may now die */
408 unix_peer(sk) = NULL;
409 }
410
411 /* Try to flush out this socket. Throw out buffers at least */
412
413 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
414 if (state==TCP_LISTEN)
415 unix_release_sock(skb->sk, 1);
416 /* passed fds are erased in the kfree_skb hook */
417 kfree_skb(skb);
418 }
419
420 if (dentry) {
421 dput(dentry);
422 mntput(mnt);
423 }
424
425 sock_put(sk);
426
427 /* ---- Socket is dead now and most probably destroyed ---- */
428
429 /*
430 * Fixme: BSD difference: In BSD all sockets connected to use get
431 * ECONNRESET and we die on the spot. In Linux we behave
432 * like files and pipes do and wait for the last
433 * dereference.
434 *
435 * Can't we simply set sock->err?
436 *
437 * What the above comment does talk about? --ANK(980817)
438 */
439
440 if (atomic_read(&unix_tot_inflight))
441 unix_gc(); /* Garbage collect fds */
442
443 return 0;
444}
445
446static int unix_listen(struct socket *sock, int backlog)
447{
448 int err;
449 struct sock *sk = sock->sk;
450 struct unix_sock *u = unix_sk(sk);
451
452 err = -EOPNOTSUPP;
453 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
454 goto out; /* Only stream/seqpacket sockets accept */
455 err = -EINVAL;
456 if (!u->addr)
457 goto out; /* No listens on an unbound socket */
458 unix_state_wlock(sk);
459 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
460 goto out_unlock;
461 if (backlog > sk->sk_max_ack_backlog)
462 wake_up_interruptible_all(&u->peer_wait);
463 sk->sk_max_ack_backlog = backlog;
464 sk->sk_state = TCP_LISTEN;
465 /* set credentials so connect can copy them */
466 sk->sk_peercred.pid = current->tgid;
467 sk->sk_peercred.uid = current->euid;
468 sk->sk_peercred.gid = current->egid;
469 err = 0;
470
471out_unlock:
472 unix_state_wunlock(sk);
473out:
474 return err;
475}
476
477static int unix_release(struct socket *);
478static int unix_bind(struct socket *, struct sockaddr *, int);
479static int unix_stream_connect(struct socket *, struct sockaddr *,
480 int addr_len, int flags);
481static int unix_socketpair(struct socket *, struct socket *);
482static int unix_accept(struct socket *, struct socket *, int);
483static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
485static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486static int unix_shutdown(struct socket *, int);
487static int unix_stream_sendmsg(struct kiocb *, struct socket *,
488 struct msghdr *, size_t);
489static int unix_stream_recvmsg(struct kiocb *, struct socket *,
490 struct msghdr *, size_t, int);
491static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
492 struct msghdr *, size_t);
493static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
494 struct msghdr *, size_t, int);
495static int unix_dgram_connect(struct socket *, struct sockaddr *,
496 int, int);
497static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
498 struct msghdr *, size_t);
499
90ddc4f0 500static const struct proto_ops unix_stream_ops = {
1da177e4
LT
501 .family = PF_UNIX,
502 .owner = THIS_MODULE,
503 .release = unix_release,
504 .bind = unix_bind,
505 .connect = unix_stream_connect,
506 .socketpair = unix_socketpair,
507 .accept = unix_accept,
508 .getname = unix_getname,
509 .poll = unix_poll,
510 .ioctl = unix_ioctl,
511 .listen = unix_listen,
512 .shutdown = unix_shutdown,
513 .setsockopt = sock_no_setsockopt,
514 .getsockopt = sock_no_getsockopt,
515 .sendmsg = unix_stream_sendmsg,
516 .recvmsg = unix_stream_recvmsg,
517 .mmap = sock_no_mmap,
518 .sendpage = sock_no_sendpage,
519};
520
90ddc4f0 521static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
522 .family = PF_UNIX,
523 .owner = THIS_MODULE,
524 .release = unix_release,
525 .bind = unix_bind,
526 .connect = unix_dgram_connect,
527 .socketpair = unix_socketpair,
528 .accept = sock_no_accept,
529 .getname = unix_getname,
530 .poll = datagram_poll,
531 .ioctl = unix_ioctl,
532 .listen = sock_no_listen,
533 .shutdown = unix_shutdown,
534 .setsockopt = sock_no_setsockopt,
535 .getsockopt = sock_no_getsockopt,
536 .sendmsg = unix_dgram_sendmsg,
537 .recvmsg = unix_dgram_recvmsg,
538 .mmap = sock_no_mmap,
539 .sendpage = sock_no_sendpage,
540};
541
90ddc4f0 542static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
543 .family = PF_UNIX,
544 .owner = THIS_MODULE,
545 .release = unix_release,
546 .bind = unix_bind,
547 .connect = unix_stream_connect,
548 .socketpair = unix_socketpair,
549 .accept = unix_accept,
550 .getname = unix_getname,
551 .poll = datagram_poll,
552 .ioctl = unix_ioctl,
553 .listen = unix_listen,
554 .shutdown = unix_shutdown,
555 .setsockopt = sock_no_setsockopt,
556 .getsockopt = sock_no_getsockopt,
557 .sendmsg = unix_seqpacket_sendmsg,
558 .recvmsg = unix_dgram_recvmsg,
559 .mmap = sock_no_mmap,
560 .sendpage = sock_no_sendpage,
561};
562
563static struct proto unix_proto = {
564 .name = "UNIX",
565 .owner = THIS_MODULE,
566 .obj_size = sizeof(struct unix_sock),
567};
568
569static struct sock * unix_create1(struct socket *sock)
570{
571 struct sock *sk = NULL;
572 struct unix_sock *u;
573
529bf6be 574 if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
1da177e4
LT
575 goto out;
576
577 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
578 if (!sk)
579 goto out;
580
581 atomic_inc(&unix_nr_socks);
582
583 sock_init_data(sock,sk);
584
585 sk->sk_write_space = unix_write_space;
586 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
587 sk->sk_destruct = unix_sock_destructor;
588 u = unix_sk(sk);
589 u->dentry = NULL;
590 u->mnt = NULL;
fd19f329 591 spin_lock_init(&u->lock);
1da177e4 592 atomic_set(&u->inflight, sock ? 0 : -1);
57b47a53 593 mutex_init(&u->readlock); /* single task reading lock */
1da177e4
LT
594 init_waitqueue_head(&u->peer_wait);
595 unix_insert_socket(unix_sockets_unbound, sk);
596out:
597 return sk;
598}
599
600static int unix_create(struct socket *sock, int protocol)
601{
602 if (protocol && protocol != PF_UNIX)
603 return -EPROTONOSUPPORT;
604
605 sock->state = SS_UNCONNECTED;
606
607 switch (sock->type) {
608 case SOCK_STREAM:
609 sock->ops = &unix_stream_ops;
610 break;
611 /*
612 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
613 * nothing uses it.
614 */
615 case SOCK_RAW:
616 sock->type=SOCK_DGRAM;
617 case SOCK_DGRAM:
618 sock->ops = &unix_dgram_ops;
619 break;
620 case SOCK_SEQPACKET:
621 sock->ops = &unix_seqpacket_ops;
622 break;
623 default:
624 return -ESOCKTNOSUPPORT;
625 }
626
627 return unix_create1(sock) ? 0 : -ENOMEM;
628}
629
630static int unix_release(struct socket *sock)
631{
632 struct sock *sk = sock->sk;
633
634 if (!sk)
635 return 0;
636
637 sock->sk = NULL;
638
639 return unix_release_sock (sk, 0);
640}
641
642static int unix_autobind(struct socket *sock)
643{
644 struct sock *sk = sock->sk;
645 struct unix_sock *u = unix_sk(sk);
646 static u32 ordernum = 1;
647 struct unix_address * addr;
648 int err;
649
57b47a53 650 mutex_lock(&u->readlock);
1da177e4
LT
651
652 err = 0;
653 if (u->addr)
654 goto out;
655
656 err = -ENOMEM;
657 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
658 if (!addr)
659 goto out;
660
661 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
662 addr->name->sun_family = AF_UNIX;
663 atomic_set(&addr->refcnt, 1);
664
665retry:
666 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
667 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
668
fbe9cc4a 669 spin_lock(&unix_table_lock);
1da177e4
LT
670 ordernum = (ordernum+1)&0xFFFFF;
671
672 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
673 addr->hash)) {
fbe9cc4a 674 spin_unlock(&unix_table_lock);
1da177e4
LT
675 /* Sanity yield. It is unusual case, but yet... */
676 if (!(ordernum&0xFF))
677 yield();
678 goto retry;
679 }
680 addr->hash ^= sk->sk_type;
681
682 __unix_remove_socket(sk);
683 u->addr = addr;
684 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 685 spin_unlock(&unix_table_lock);
1da177e4
LT
686 err = 0;
687
57b47a53 688out: mutex_unlock(&u->readlock);
1da177e4
LT
689 return err;
690}
691
692static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
693 int type, unsigned hash, int *error)
694{
695 struct sock *u;
696 struct nameidata nd;
697 int err = 0;
698
699 if (sunname->sun_path[0]) {
700 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
701 if (err)
702 goto fail;
e4543edd 703 err = vfs_permission(&nd, MAY_WRITE);
1da177e4
LT
704 if (err)
705 goto put_fail;
706
707 err = -ECONNREFUSED;
708 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
709 goto put_fail;
710 u=unix_find_socket_byinode(nd.dentry->d_inode);
711 if (!u)
712 goto put_fail;
713
714 if (u->sk_type == type)
715 touch_atime(nd.mnt, nd.dentry);
716
717 path_release(&nd);
718
719 err=-EPROTOTYPE;
720 if (u->sk_type != type) {
721 sock_put(u);
722 goto fail;
723 }
724 } else {
725 err = -ECONNREFUSED;
726 u=unix_find_socket_byname(sunname, len, type, hash);
727 if (u) {
728 struct dentry *dentry;
729 dentry = unix_sk(u)->dentry;
730 if (dentry)
731 touch_atime(unix_sk(u)->mnt, dentry);
732 } else
733 goto fail;
734 }
735 return u;
736
737put_fail:
738 path_release(&nd);
739fail:
740 *error=err;
741 return NULL;
742}
743
744
745static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
746{
747 struct sock *sk = sock->sk;
748 struct unix_sock *u = unix_sk(sk);
749 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
750 struct dentry * dentry = NULL;
751 struct nameidata nd;
752 int err;
753 unsigned hash;
754 struct unix_address *addr;
755 struct hlist_head *list;
756
757 err = -EINVAL;
758 if (sunaddr->sun_family != AF_UNIX)
759 goto out;
760
761 if (addr_len==sizeof(short)) {
762 err = unix_autobind(sock);
763 goto out;
764 }
765
766 err = unix_mkname(sunaddr, addr_len, &hash);
767 if (err < 0)
768 goto out;
769 addr_len = err;
770
57b47a53 771 mutex_lock(&u->readlock);
1da177e4
LT
772
773 err = -EINVAL;
774 if (u->addr)
775 goto out_up;
776
777 err = -ENOMEM;
778 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
779 if (!addr)
780 goto out_up;
781
782 memcpy(addr->name, sunaddr, addr_len);
783 addr->len = addr_len;
784 addr->hash = hash ^ sk->sk_type;
785 atomic_set(&addr->refcnt, 1);
786
787 if (sunaddr->sun_path[0]) {
788 unsigned int mode;
789 err = 0;
790 /*
791 * Get the parent directory, calculate the hash for last
792 * component.
793 */
794 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
795 if (err)
796 goto out_mknod_parent;
f81a0bff
CH
797
798 dentry = lookup_create(&nd, 0);
1da177e4
LT
799 err = PTR_ERR(dentry);
800 if (IS_ERR(dentry))
801 goto out_mknod_unlock;
f81a0bff 802
1da177e4
LT
803 /*
804 * All right, let's create it.
805 */
806 mode = S_IFSOCK |
807 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
808 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
809 if (err)
810 goto out_mknod_dput;
1b1dcc1b 811 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
812 dput(nd.dentry);
813 nd.dentry = dentry;
814
815 addr->hash = UNIX_HASH_SIZE;
816 }
817
fbe9cc4a 818 spin_lock(&unix_table_lock);
1da177e4
LT
819
820 if (!sunaddr->sun_path[0]) {
821 err = -EADDRINUSE;
822 if (__unix_find_socket_byname(sunaddr, addr_len,
823 sk->sk_type, hash)) {
824 unix_release_addr(addr);
825 goto out_unlock;
826 }
827
828 list = &unix_socket_table[addr->hash];
829 } else {
830 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
831 u->dentry = nd.dentry;
832 u->mnt = nd.mnt;
833 }
834
835 err = 0;
836 __unix_remove_socket(sk);
837 u->addr = addr;
838 __unix_insert_socket(list, sk);
839
840out_unlock:
fbe9cc4a 841 spin_unlock(&unix_table_lock);
1da177e4 842out_up:
57b47a53 843 mutex_unlock(&u->readlock);
1da177e4
LT
844out:
845 return err;
846
847out_mknod_dput:
848 dput(dentry);
849out_mknod_unlock:
1b1dcc1b 850 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
851 path_release(&nd);
852out_mknod_parent:
853 if (err==-EEXIST)
854 err=-EADDRINUSE;
855 unix_release_addr(addr);
856 goto out_up;
857}
858
859static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
860 int alen, int flags)
861{
862 struct sock *sk = sock->sk;
863 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
864 struct sock *other;
865 unsigned hash;
866 int err;
867
868 if (addr->sa_family != AF_UNSPEC) {
869 err = unix_mkname(sunaddr, alen, &hash);
870 if (err < 0)
871 goto out;
872 alen = err;
873
874 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
875 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
876 goto out;
877
878 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
879 if (!other)
880 goto out;
881
882 unix_state_wlock(sk);
883
884 err = -EPERM;
885 if (!unix_may_send(sk, other))
886 goto out_unlock;
887
888 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
889 if (err)
890 goto out_unlock;
891
892 } else {
893 /*
894 * 1003.1g breaking connected state with AF_UNSPEC
895 */
896 other = NULL;
897 unix_state_wlock(sk);
898 }
899
900 /*
901 * If it was connected, reconnect.
902 */
903 if (unix_peer(sk)) {
904 struct sock *old_peer = unix_peer(sk);
905 unix_peer(sk)=other;
906 unix_state_wunlock(sk);
907
908 if (other != old_peer)
909 unix_dgram_disconnected(sk, old_peer);
910 sock_put(old_peer);
911 } else {
912 unix_peer(sk)=other;
913 unix_state_wunlock(sk);
914 }
915 return 0;
916
917out_unlock:
918 unix_state_wunlock(sk);
919 sock_put(other);
920out:
921 return err;
922}
923
924static long unix_wait_for_peer(struct sock *other, long timeo)
925{
926 struct unix_sock *u = unix_sk(other);
927 int sched;
928 DEFINE_WAIT(wait);
929
930 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
931
932 sched = !sock_flag(other, SOCK_DEAD) &&
933 !(other->sk_shutdown & RCV_SHUTDOWN) &&
934 (skb_queue_len(&other->sk_receive_queue) >
935 other->sk_max_ack_backlog);
936
937 unix_state_runlock(other);
938
939 if (sched)
940 timeo = schedule_timeout(timeo);
941
942 finish_wait(&u->peer_wait, &wait);
943 return timeo;
944}
945
946static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
947 int addr_len, int flags)
948{
949 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
950 struct sock *sk = sock->sk;
951 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
952 struct sock *newsk = NULL;
953 struct sock *other = NULL;
954 struct sk_buff *skb = NULL;
955 unsigned hash;
956 int st;
957 int err;
958 long timeo;
959
960 err = unix_mkname(sunaddr, addr_len, &hash);
961 if (err < 0)
962 goto out;
963 addr_len = err;
964
965 if (test_bit(SOCK_PASSCRED, &sock->flags)
966 && !u->addr && (err = unix_autobind(sock)) != 0)
967 goto out;
968
969 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
970
971 /* First of all allocate resources.
972 If we will make it after state is locked,
973 we will have to recheck all again in any case.
974 */
975
976 err = -ENOMEM;
977
978 /* create new sock for complete connection */
979 newsk = unix_create1(NULL);
980 if (newsk == NULL)
981 goto out;
982
983 /* Allocate skb for sending to listening sock */
984 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
985 if (skb == NULL)
986 goto out;
987
988restart:
989 /* Find listening sock. */
990 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
991 if (!other)
992 goto out;
993
994 /* Latch state of peer */
995 unix_state_rlock(other);
996
997 /* Apparently VFS overslept socket death. Retry. */
998 if (sock_flag(other, SOCK_DEAD)) {
999 unix_state_runlock(other);
1000 sock_put(other);
1001 goto restart;
1002 }
1003
1004 err = -ECONNREFUSED;
1005 if (other->sk_state != TCP_LISTEN)
1006 goto out_unlock;
1007
1008 if (skb_queue_len(&other->sk_receive_queue) >
1009 other->sk_max_ack_backlog) {
1010 err = -EAGAIN;
1011 if (!timeo)
1012 goto out_unlock;
1013
1014 timeo = unix_wait_for_peer(other, timeo);
1015
1016 err = sock_intr_errno(timeo);
1017 if (signal_pending(current))
1018 goto out;
1019 sock_put(other);
1020 goto restart;
1021 }
1022
1023 /* Latch our state.
1024
1025 It is tricky place. We need to grab write lock and cannot
1026 drop lock on peer. It is dangerous because deadlock is
1027 possible. Connect to self case and simultaneous
1028 attempt to connect are eliminated by checking socket
1029 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1030 check this before attempt to grab lock.
1031
1032 Well, and we have to recheck the state after socket locked.
1033 */
1034 st = sk->sk_state;
1035
1036 switch (st) {
1037 case TCP_CLOSE:
1038 /* This is ok... continue with connect */
1039 break;
1040 case TCP_ESTABLISHED:
1041 /* Socket is already connected */
1042 err = -EISCONN;
1043 goto out_unlock;
1044 default:
1045 err = -EINVAL;
1046 goto out_unlock;
1047 }
1048
1049 unix_state_wlock(sk);
1050
1051 if (sk->sk_state != st) {
1052 unix_state_wunlock(sk);
1053 unix_state_runlock(other);
1054 sock_put(other);
1055 goto restart;
1056 }
1057
1058 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1059 if (err) {
1060 unix_state_wunlock(sk);
1061 goto out_unlock;
1062 }
1063
1064 /* The way is open! Fastly set all the necessary fields... */
1065
1066 sock_hold(sk);
1067 unix_peer(newsk) = sk;
1068 newsk->sk_state = TCP_ESTABLISHED;
1069 newsk->sk_type = sk->sk_type;
1070 newsk->sk_peercred.pid = current->tgid;
1071 newsk->sk_peercred.uid = current->euid;
1072 newsk->sk_peercred.gid = current->egid;
1073 newu = unix_sk(newsk);
1074 newsk->sk_sleep = &newu->peer_wait;
1075 otheru = unix_sk(other);
1076
1077 /* copy address information from listening to new sock*/
1078 if (otheru->addr) {
1079 atomic_inc(&otheru->addr->refcnt);
1080 newu->addr = otheru->addr;
1081 }
1082 if (otheru->dentry) {
1083 newu->dentry = dget(otheru->dentry);
1084 newu->mnt = mntget(otheru->mnt);
1085 }
1086
1087 /* Set credentials */
1088 sk->sk_peercred = other->sk_peercred;
1089
1da177e4
LT
1090 sock->state = SS_CONNECTED;
1091 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1092 sock_hold(newsk);
1093
1094 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1095 unix_peer(sk) = newsk;
1da177e4
LT
1096
1097 unix_state_wunlock(sk);
1098
1099 /* take ten and and send info to listening sock */
1100 spin_lock(&other->sk_receive_queue.lock);
1101 __skb_queue_tail(&other->sk_receive_queue, skb);
1102 /* Undo artificially decreased inflight after embrion
1103 * is installed to listening socket. */
1104 atomic_inc(&newu->inflight);
1105 spin_unlock(&other->sk_receive_queue.lock);
1106 unix_state_runlock(other);
1107 other->sk_data_ready(other, 0);
1108 sock_put(other);
1109 return 0;
1110
1111out_unlock:
1112 if (other)
1113 unix_state_runlock(other);
1114
1115out:
1116 if (skb)
1117 kfree_skb(skb);
1118 if (newsk)
1119 unix_release_sock(newsk, 0);
1120 if (other)
1121 sock_put(other);
1122 return err;
1123}
1124
1125static int unix_socketpair(struct socket *socka, struct socket *sockb)
1126{
1127 struct sock *ska=socka->sk, *skb = sockb->sk;
1128
1129 /* Join our sockets back to back */
1130 sock_hold(ska);
1131 sock_hold(skb);
1132 unix_peer(ska)=skb;
1133 unix_peer(skb)=ska;
1134 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1135 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1136 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1137
1138 if (ska->sk_type != SOCK_DGRAM) {
1139 ska->sk_state = TCP_ESTABLISHED;
1140 skb->sk_state = TCP_ESTABLISHED;
1141 socka->state = SS_CONNECTED;
1142 sockb->state = SS_CONNECTED;
1143 }
1144 return 0;
1145}
1146
1147static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1148{
1149 struct sock *sk = sock->sk;
1150 struct sock *tsk;
1151 struct sk_buff *skb;
1152 int err;
1153
1154 err = -EOPNOTSUPP;
1155 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1156 goto out;
1157
1158 err = -EINVAL;
1159 if (sk->sk_state != TCP_LISTEN)
1160 goto out;
1161
1162 /* If socket state is TCP_LISTEN it cannot change (for now...),
1163 * so that no locks are necessary.
1164 */
1165
1166 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1167 if (!skb) {
1168 /* This means receive shutdown. */
1169 if (err == 0)
1170 err = -EINVAL;
1171 goto out;
1172 }
1173
1174 tsk = skb->sk;
1175 skb_free_datagram(sk, skb);
1176 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1177
1178 /* attach accepted sock to socket */
1179 unix_state_wlock(tsk);
1180 newsock->state = SS_CONNECTED;
1181 sock_graft(tsk, newsock);
1182 unix_state_wunlock(tsk);
1183 return 0;
1184
1185out:
1186 return err;
1187}
1188
1189
1190static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1191{
1192 struct sock *sk = sock->sk;
1193 struct unix_sock *u;
1194 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1195 int err = 0;
1196
1197 if (peer) {
1198 sk = unix_peer_get(sk);
1199
1200 err = -ENOTCONN;
1201 if (!sk)
1202 goto out;
1203 err = 0;
1204 } else {
1205 sock_hold(sk);
1206 }
1207
1208 u = unix_sk(sk);
1209 unix_state_rlock(sk);
1210 if (!u->addr) {
1211 sunaddr->sun_family = AF_UNIX;
1212 sunaddr->sun_path[0] = 0;
1213 *uaddr_len = sizeof(short);
1214 } else {
1215 struct unix_address *addr = u->addr;
1216
1217 *uaddr_len = addr->len;
1218 memcpy(sunaddr, addr->name, *uaddr_len);
1219 }
1220 unix_state_runlock(sk);
1221 sock_put(sk);
1222out:
1223 return err;
1224}
1225
1226static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1227{
1228 int i;
1229
1230 scm->fp = UNIXCB(skb).fp;
1231 skb->destructor = sock_wfree;
1232 UNIXCB(skb).fp = NULL;
1233
1234 for (i=scm->fp->count-1; i>=0; i--)
1235 unix_notinflight(scm->fp->fp[i]);
1236}
1237
1238static void unix_destruct_fds(struct sk_buff *skb)
1239{
1240 struct scm_cookie scm;
1241 memset(&scm, 0, sizeof(scm));
1242 unix_detach_fds(&scm, skb);
1243
1244 /* Alas, it calls VFS */
1245 /* So fscking what? fput() had been SMP-safe since the last Summer */
1246 scm_destroy(&scm);
1247 sock_wfree(skb);
1248}
1249
1250static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1251{
1252 int i;
1253 for (i=scm->fp->count-1; i>=0; i--)
1254 unix_inflight(scm->fp->fp[i]);
1255 UNIXCB(skb).fp = scm->fp;
1256 skb->destructor = unix_destruct_fds;
1257 scm->fp = NULL;
1258}
1259
1260/*
1261 * Send AF_UNIX data.
1262 */
1263
1264static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1265 struct msghdr *msg, size_t len)
1266{
1267 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1268 struct sock *sk = sock->sk;
1269 struct unix_sock *u = unix_sk(sk);
1270 struct sockaddr_un *sunaddr=msg->msg_name;
1271 struct sock *other = NULL;
1272 int namelen = 0; /* fake GCC */
1273 int err;
1274 unsigned hash;
1275 struct sk_buff *skb;
1276 long timeo;
1277 struct scm_cookie tmp_scm;
1278
1279 if (NULL == siocb->scm)
1280 siocb->scm = &tmp_scm;
1281 err = scm_send(sock, msg, siocb->scm);
1282 if (err < 0)
1283 return err;
1284
1285 err = -EOPNOTSUPP;
1286 if (msg->msg_flags&MSG_OOB)
1287 goto out;
1288
1289 if (msg->msg_namelen) {
1290 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1291 if (err < 0)
1292 goto out;
1293 namelen = err;
1294 } else {
1295 sunaddr = NULL;
1296 err = -ENOTCONN;
1297 other = unix_peer_get(sk);
1298 if (!other)
1299 goto out;
1300 }
1301
1302 if (test_bit(SOCK_PASSCRED, &sock->flags)
1303 && !u->addr && (err = unix_autobind(sock)) != 0)
1304 goto out;
1305
1306 err = -EMSGSIZE;
1307 if (len > sk->sk_sndbuf - 32)
1308 goto out;
1309
1310 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1311 if (skb==NULL)
1312 goto out;
1313
1314 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1315 if (siocb->scm->fp)
1316 unix_attach_fds(siocb->scm, skb);
1317
877ce7c1
CZ
1318 unix_get_peersec_dgram(skb);
1319
1da177e4
LT
1320 skb->h.raw = skb->data;
1321 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1322 if (err)
1323 goto out_free;
1324
1325 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1326
1327restart:
1328 if (!other) {
1329 err = -ECONNRESET;
1330 if (sunaddr == NULL)
1331 goto out_free;
1332
1333 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1334 hash, &err);
1335 if (other==NULL)
1336 goto out_free;
1337 }
1338
1339 unix_state_rlock(other);
1340 err = -EPERM;
1341 if (!unix_may_send(sk, other))
1342 goto out_unlock;
1343
1344 if (sock_flag(other, SOCK_DEAD)) {
1345 /*
1346 * Check with 1003.1g - what should
1347 * datagram error
1348 */
1349 unix_state_runlock(other);
1350 sock_put(other);
1351
1352 err = 0;
1353 unix_state_wlock(sk);
1354 if (unix_peer(sk) == other) {
1355 unix_peer(sk)=NULL;
1356 unix_state_wunlock(sk);
1357
1358 unix_dgram_disconnected(sk, other);
1359 sock_put(other);
1360 err = -ECONNREFUSED;
1361 } else {
1362 unix_state_wunlock(sk);
1363 }
1364
1365 other = NULL;
1366 if (err)
1367 goto out_free;
1368 goto restart;
1369 }
1370
1371 err = -EPIPE;
1372 if (other->sk_shutdown & RCV_SHUTDOWN)
1373 goto out_unlock;
1374
1375 if (sk->sk_type != SOCK_SEQPACKET) {
1376 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1377 if (err)
1378 goto out_unlock;
1379 }
1380
1381 if (unix_peer(other) != sk &&
1382 (skb_queue_len(&other->sk_receive_queue) >
1383 other->sk_max_ack_backlog)) {
1384 if (!timeo) {
1385 err = -EAGAIN;
1386 goto out_unlock;
1387 }
1388
1389 timeo = unix_wait_for_peer(other, timeo);
1390
1391 err = sock_intr_errno(timeo);
1392 if (signal_pending(current))
1393 goto out_free;
1394
1395 goto restart;
1396 }
1397
1398 skb_queue_tail(&other->sk_receive_queue, skb);
1399 unix_state_runlock(other);
1400 other->sk_data_ready(other, len);
1401 sock_put(other);
1402 scm_destroy(siocb->scm);
1403 return len;
1404
1405out_unlock:
1406 unix_state_runlock(other);
1407out_free:
1408 kfree_skb(skb);
1409out:
1410 if (other)
1411 sock_put(other);
1412 scm_destroy(siocb->scm);
1413 return err;
1414}
1415
1416
1417static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1418 struct msghdr *msg, size_t len)
1419{
1420 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1421 struct sock *sk = sock->sk;
1422 struct sock *other = NULL;
1423 struct sockaddr_un *sunaddr=msg->msg_name;
1424 int err,size;
1425 struct sk_buff *skb;
1426 int sent=0;
1427 struct scm_cookie tmp_scm;
1428
1429 if (NULL == siocb->scm)
1430 siocb->scm = &tmp_scm;
1431 err = scm_send(sock, msg, siocb->scm);
1432 if (err < 0)
1433 return err;
1434
1435 err = -EOPNOTSUPP;
1436 if (msg->msg_flags&MSG_OOB)
1437 goto out_err;
1438
1439 if (msg->msg_namelen) {
1440 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1441 goto out_err;
1442 } else {
1443 sunaddr = NULL;
1444 err = -ENOTCONN;
830a1e5c 1445 other = unix_peer(sk);
1da177e4
LT
1446 if (!other)
1447 goto out_err;
1448 }
1449
1450 if (sk->sk_shutdown & SEND_SHUTDOWN)
1451 goto pipe_err;
1452
1453 while(sent < len)
1454 {
1455 /*
e9df7d7f
BL
1456 * Optimisation for the fact that under 0.01% of X
1457 * messages typically need breaking up.
1da177e4
LT
1458 */
1459
e9df7d7f 1460 size = len-sent;
1da177e4
LT
1461
1462 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1463 if (size > ((sk->sk_sndbuf >> 1) - 64))
1464 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1465
1466 if (size > SKB_MAX_ALLOC)
1467 size = SKB_MAX_ALLOC;
1468
1469 /*
1470 * Grab a buffer
1471 */
1472
1473 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1474
1475 if (skb==NULL)
1476 goto out_err;
1477
1478 /*
1479 * If you pass two values to the sock_alloc_send_skb
1480 * it tries to grab the large buffer with GFP_NOFS
1481 * (which can fail easily), and if it fails grab the
1482 * fallback size buffer which is under a page and will
1483 * succeed. [Alan]
1484 */
1485 size = min_t(int, size, skb_tailroom(skb));
1486
1487 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1488 if (siocb->scm->fp)
1489 unix_attach_fds(siocb->scm, skb);
1490
1491 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1492 kfree_skb(skb);
1493 goto out_err;
1494 }
1495
1496 unix_state_rlock(other);
1497
1498 if (sock_flag(other, SOCK_DEAD) ||
1499 (other->sk_shutdown & RCV_SHUTDOWN))
1500 goto pipe_err_free;
1501
1502 skb_queue_tail(&other->sk_receive_queue, skb);
1503 unix_state_runlock(other);
1504 other->sk_data_ready(other, size);
1505 sent+=size;
1506 }
1da177e4
LT
1507
1508 scm_destroy(siocb->scm);
1509 siocb->scm = NULL;
1510
1511 return sent;
1512
1513pipe_err_free:
1514 unix_state_runlock(other);
1515 kfree_skb(skb);
1516pipe_err:
1517 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1518 send_sig(SIGPIPE,current,0);
1519 err = -EPIPE;
1520out_err:
1da177e4
LT
1521 scm_destroy(siocb->scm);
1522 siocb->scm = NULL;
1523 return sent ? : err;
1524}
1525
1526static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1527 struct msghdr *msg, size_t len)
1528{
1529 int err;
1530 struct sock *sk = sock->sk;
1531
1532 err = sock_error(sk);
1533 if (err)
1534 return err;
1535
1536 if (sk->sk_state != TCP_ESTABLISHED)
1537 return -ENOTCONN;
1538
1539 if (msg->msg_namelen)
1540 msg->msg_namelen = 0;
1541
1542 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1543}
1544
1545static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1546{
1547 struct unix_sock *u = unix_sk(sk);
1548
1549 msg->msg_namelen = 0;
1550 if (u->addr) {
1551 msg->msg_namelen = u->addr->len;
1552 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1553 }
1554}
1555
1556static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1557 struct msghdr *msg, size_t size,
1558 int flags)
1559{
1560 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1561 struct scm_cookie tmp_scm;
1562 struct sock *sk = sock->sk;
1563 struct unix_sock *u = unix_sk(sk);
1564 int noblock = flags & MSG_DONTWAIT;
1565 struct sk_buff *skb;
1566 int err;
1567
1568 err = -EOPNOTSUPP;
1569 if (flags&MSG_OOB)
1570 goto out;
1571
1572 msg->msg_namelen = 0;
1573
57b47a53 1574 mutex_lock(&u->readlock);
1da177e4
LT
1575
1576 skb = skb_recv_datagram(sk, flags, noblock, &err);
1577 if (!skb)
1578 goto out_unlock;
1579
1580 wake_up_interruptible(&u->peer_wait);
1581
1582 if (msg->msg_name)
1583 unix_copy_addr(msg, skb->sk);
1584
1585 if (size > skb->len)
1586 size = skb->len;
1587 else if (size < skb->len)
1588 msg->msg_flags |= MSG_TRUNC;
1589
1590 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1591 if (err)
1592 goto out_free;
1593
1594 if (!siocb->scm) {
1595 siocb->scm = &tmp_scm;
1596 memset(&tmp_scm, 0, sizeof(tmp_scm));
1597 }
1598 siocb->scm->creds = *UNIXCREDS(skb);
877ce7c1 1599 unix_set_secdata(siocb->scm, skb);
1da177e4
LT
1600
1601 if (!(flags & MSG_PEEK))
1602 {
1603 if (UNIXCB(skb).fp)
1604 unix_detach_fds(siocb->scm, skb);
1605 }
1606 else
1607 {
1608 /* It is questionable: on PEEK we could:
1609 - do not return fds - good, but too simple 8)
1610 - return fds, and do not return them on read (old strategy,
1611 apparently wrong)
1612 - clone fds (I chose it for now, it is the most universal
1613 solution)
1614
1615 POSIX 1003.1g does not actually define this clearly
1616 at all. POSIX 1003.1g doesn't define a lot of things
1617 clearly however!
1618
1619 */
1620 if (UNIXCB(skb).fp)
1621 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1622 }
1623 err = size;
1624
1625 scm_recv(sock, msg, siocb->scm, flags);
1626
1627out_free:
1628 skb_free_datagram(sk,skb);
1629out_unlock:
57b47a53 1630 mutex_unlock(&u->readlock);
1da177e4
LT
1631out:
1632 return err;
1633}
1634
1635/*
1636 * Sleep until data has arrive. But check for races..
1637 */
1638
1639static long unix_stream_data_wait(struct sock * sk, long timeo)
1640{
1641 DEFINE_WAIT(wait);
1642
1643 unix_state_rlock(sk);
1644
1645 for (;;) {
1646 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1647
b03efcfb 1648 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1da177e4
LT
1649 sk->sk_err ||
1650 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1651 signal_pending(current) ||
1652 !timeo)
1653 break;
1654
1655 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1656 unix_state_runlock(sk);
1657 timeo = schedule_timeout(timeo);
1658 unix_state_rlock(sk);
1659 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1660 }
1661
1662 finish_wait(sk->sk_sleep, &wait);
1663 unix_state_runlock(sk);
1664 return timeo;
1665}
1666
1667
1668
1669static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1670 struct msghdr *msg, size_t size,
1671 int flags)
1672{
1673 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1674 struct scm_cookie tmp_scm;
1675 struct sock *sk = sock->sk;
1676 struct unix_sock *u = unix_sk(sk);
1677 struct sockaddr_un *sunaddr=msg->msg_name;
1678 int copied = 0;
1679 int check_creds = 0;
1680 int target;
1681 int err = 0;
1682 long timeo;
1683
1684 err = -EINVAL;
1685 if (sk->sk_state != TCP_ESTABLISHED)
1686 goto out;
1687
1688 err = -EOPNOTSUPP;
1689 if (flags&MSG_OOB)
1690 goto out;
1691
1692 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1693 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1694
1695 msg->msg_namelen = 0;
1696
1697 /* Lock the socket to prevent queue disordering
1698 * while sleeps in memcpy_tomsg
1699 */
1700
1701 if (!siocb->scm) {
1702 siocb->scm = &tmp_scm;
1703 memset(&tmp_scm, 0, sizeof(tmp_scm));
1704 }
1705
57b47a53 1706 mutex_lock(&u->readlock);
1da177e4
LT
1707
1708 do
1709 {
1710 int chunk;
1711 struct sk_buff *skb;
1712
1713 skb = skb_dequeue(&sk->sk_receive_queue);
1714 if (skb==NULL)
1715 {
1716 if (copied >= target)
1717 break;
1718
1719 /*
1720 * POSIX 1003.1g mandates this order.
1721 */
1722
1723 if ((err = sock_error(sk)) != 0)
1724 break;
1725 if (sk->sk_shutdown & RCV_SHUTDOWN)
1726 break;
1727 err = -EAGAIN;
1728 if (!timeo)
1729 break;
57b47a53 1730 mutex_unlock(&u->readlock);
1da177e4
LT
1731
1732 timeo = unix_stream_data_wait(sk, timeo);
1733
1734 if (signal_pending(current)) {
1735 err = sock_intr_errno(timeo);
1736 goto out;
1737 }
57b47a53 1738 mutex_lock(&u->readlock);
1da177e4
LT
1739 continue;
1740 }
1741
1742 if (check_creds) {
1743 /* Never glue messages from different writers */
1744 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1745 skb_queue_head(&sk->sk_receive_queue, skb);
1746 break;
1747 }
1748 } else {
1749 /* Copy credentials */
1750 siocb->scm->creds = *UNIXCREDS(skb);
1751 check_creds = 1;
1752 }
1753
1754 /* Copy address just once */
1755 if (sunaddr)
1756 {
1757 unix_copy_addr(msg, skb->sk);
1758 sunaddr = NULL;
1759 }
1760
1761 chunk = min_t(unsigned int, skb->len, size);
1762 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1763 skb_queue_head(&sk->sk_receive_queue, skb);
1764 if (copied == 0)
1765 copied = -EFAULT;
1766 break;
1767 }
1768 copied += chunk;
1769 size -= chunk;
1770
1771 /* Mark read part of skb as used */
1772 if (!(flags & MSG_PEEK))
1773 {
1774 skb_pull(skb, chunk);
1775
1776 if (UNIXCB(skb).fp)
1777 unix_detach_fds(siocb->scm, skb);
1778
1779 /* put the skb back if we didn't use it up.. */
1780 if (skb->len)
1781 {
1782 skb_queue_head(&sk->sk_receive_queue, skb);
1783 break;
1784 }
1785
1786 kfree_skb(skb);
1787
1788 if (siocb->scm->fp)
1789 break;
1790 }
1791 else
1792 {
1793 /* It is questionable, see note in unix_dgram_recvmsg.
1794 */
1795 if (UNIXCB(skb).fp)
1796 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1797
1798 /* put message back and return */
1799 skb_queue_head(&sk->sk_receive_queue, skb);
1800 break;
1801 }
1802 } while (size);
1803
57b47a53 1804 mutex_unlock(&u->readlock);
1da177e4
LT
1805 scm_recv(sock, msg, siocb->scm, flags);
1806out:
1807 return copied ? : err;
1808}
1809
1810static int unix_shutdown(struct socket *sock, int mode)
1811{
1812 struct sock *sk = sock->sk;
1813 struct sock *other;
1814
1815 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1816
1817 if (mode) {
1818 unix_state_wlock(sk);
1819 sk->sk_shutdown |= mode;
1820 other=unix_peer(sk);
1821 if (other)
1822 sock_hold(other);
1823 unix_state_wunlock(sk);
1824 sk->sk_state_change(sk);
1825
1826 if (other &&
1827 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1828
1829 int peer_mode = 0;
1830
1831 if (mode&RCV_SHUTDOWN)
1832 peer_mode |= SEND_SHUTDOWN;
1833 if (mode&SEND_SHUTDOWN)
1834 peer_mode |= RCV_SHUTDOWN;
1835 unix_state_wlock(other);
1836 other->sk_shutdown |= peer_mode;
1837 unix_state_wunlock(other);
1838 other->sk_state_change(other);
1839 read_lock(&other->sk_callback_lock);
1840 if (peer_mode == SHUTDOWN_MASK)
1841 sk_wake_async(other,1,POLL_HUP);
1842 else if (peer_mode & RCV_SHUTDOWN)
1843 sk_wake_async(other,1,POLL_IN);
1844 read_unlock(&other->sk_callback_lock);
1845 }
1846 if (other)
1847 sock_put(other);
1848 }
1849 return 0;
1850}
1851
1852static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1853{
1854 struct sock *sk = sock->sk;
1855 long amount=0;
1856 int err;
1857
1858 switch(cmd)
1859 {
1860 case SIOCOUTQ:
1861 amount = atomic_read(&sk->sk_wmem_alloc);
1862 err = put_user(amount, (int __user *)arg);
1863 break;
1864 case SIOCINQ:
1865 {
1866 struct sk_buff *skb;
1867
1868 if (sk->sk_state == TCP_LISTEN) {
1869 err = -EINVAL;
1870 break;
1871 }
1872
1873 spin_lock(&sk->sk_receive_queue.lock);
1874 if (sk->sk_type == SOCK_STREAM ||
1875 sk->sk_type == SOCK_SEQPACKET) {
1876 skb_queue_walk(&sk->sk_receive_queue, skb)
1877 amount += skb->len;
1878 } else {
1879 skb = skb_peek(&sk->sk_receive_queue);
1880 if (skb)
1881 amount=skb->len;
1882 }
1883 spin_unlock(&sk->sk_receive_queue.lock);
1884 err = put_user(amount, (int __user *)arg);
1885 break;
1886 }
1887
1888 default:
b5e5fa5e 1889 err = -ENOIOCTLCMD;
1da177e4
LT
1890 break;
1891 }
1892 return err;
1893}
1894
1895static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1896{
1897 struct sock *sk = sock->sk;
1898 unsigned int mask;
1899
1900 poll_wait(file, sk->sk_sleep, wait);
1901 mask = 0;
1902
1903 /* exceptional events? */
1904 if (sk->sk_err)
1905 mask |= POLLERR;
1906 if (sk->sk_shutdown == SHUTDOWN_MASK)
1907 mask |= POLLHUP;
f348d70a
DL
1908 if (sk->sk_shutdown & RCV_SHUTDOWN)
1909 mask |= POLLRDHUP;
1da177e4
LT
1910
1911 /* readable? */
1912 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1913 (sk->sk_shutdown & RCV_SHUTDOWN))
1914 mask |= POLLIN | POLLRDNORM;
1915
1916 /* Connection-based need to check for termination and startup */
1917 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1918 mask |= POLLHUP;
1919
1920 /*
1921 * we set writable also when the other side has shut down the
1922 * connection. This prevents stuck sockets.
1923 */
1924 if (unix_writable(sk))
1925 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1926
1927 return mask;
1928}
1929
1930
1931#ifdef CONFIG_PROC_FS
1932static struct sock *unix_seq_idx(int *iter, loff_t pos)
1933{
1934 loff_t off = 0;
1935 struct sock *s;
1936
1937 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1938 if (off == pos)
1939 return s;
1940 ++off;
1941 }
1942 return NULL;
1943}
1944
1945
1946static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1947{
fbe9cc4a 1948 spin_lock(&unix_table_lock);
1da177e4
LT
1949 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1950}
1951
1952static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1953{
1954 ++*pos;
1955
1956 if (v == (void *)1)
1957 return first_unix_socket(seq->private);
1958 return next_unix_socket(seq->private, v);
1959}
1960
1961static void unix_seq_stop(struct seq_file *seq, void *v)
1962{
fbe9cc4a 1963 spin_unlock(&unix_table_lock);
1da177e4
LT
1964}
1965
1966static int unix_seq_show(struct seq_file *seq, void *v)
1967{
1968
1969 if (v == (void *)1)
1970 seq_puts(seq, "Num RefCount Protocol Flags Type St "
1971 "Inode Path\n");
1972 else {
1973 struct sock *s = v;
1974 struct unix_sock *u = unix_sk(s);
1975 unix_state_rlock(s);
1976
1977 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1978 s,
1979 atomic_read(&s->sk_refcnt),
1980 0,
1981 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1982 s->sk_type,
1983 s->sk_socket ?
1984 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1985 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1986 sock_i_ino(s));
1987
1988 if (u->addr) {
1989 int i, len;
1990 seq_putc(seq, ' ');
1991
1992 i = 0;
1993 len = u->addr->len - sizeof(short);
1994 if (!UNIX_ABSTRACT(s))
1995 len--;
1996 else {
1997 seq_putc(seq, '@');
1998 i++;
1999 }
2000 for ( ; i < len; i++)
2001 seq_putc(seq, u->addr->name->sun_path[i]);
2002 }
2003 unix_state_runlock(s);
2004 seq_putc(seq, '\n');
2005 }
2006
2007 return 0;
2008}
2009
2010static struct seq_operations unix_seq_ops = {
2011 .start = unix_seq_start,
2012 .next = unix_seq_next,
2013 .stop = unix_seq_stop,
2014 .show = unix_seq_show,
2015};
2016
2017
2018static int unix_seq_open(struct inode *inode, struct file *file)
2019{
2020 struct seq_file *seq;
2021 int rc = -ENOMEM;
2022 int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2023
2024 if (!iter)
2025 goto out;
2026
2027 rc = seq_open(file, &unix_seq_ops);
2028 if (rc)
2029 goto out_kfree;
2030
2031 seq = file->private_data;
2032 seq->private = iter;
2033 *iter = 0;
2034out:
2035 return rc;
2036out_kfree:
2037 kfree(iter);
2038 goto out;
2039}
2040
2041static struct file_operations unix_seq_fops = {
2042 .owner = THIS_MODULE,
2043 .open = unix_seq_open,
2044 .read = seq_read,
2045 .llseek = seq_lseek,
2046 .release = seq_release_private,
2047};
2048
2049#endif
2050
2051static struct net_proto_family unix_family_ops = {
2052 .family = PF_UNIX,
2053 .create = unix_create,
2054 .owner = THIS_MODULE,
2055};
2056
1da177e4
LT
2057static int __init af_unix_init(void)
2058{
2059 int rc = -1;
2060 struct sk_buff *dummy_skb;
2061
2062 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2063 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2064 goto out;
2065 }
2066
2067 rc = proto_register(&unix_proto, 1);
2068 if (rc != 0) {
2069 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2070 __FUNCTION__);
2071 goto out;
2072 }
2073
2074 sock_register(&unix_family_ops);
2075#ifdef CONFIG_PROC_FS
2076 proc_net_fops_create("unix", 0, &unix_seq_fops);
2077#endif
2078 unix_sysctl_register();
2079out:
2080 return rc;
2081}
2082
2083static void __exit af_unix_exit(void)
2084{
2085 sock_unregister(PF_UNIX);
2086 unix_sysctl_unregister();
2087 proc_net_remove("unix");
2088 proto_unregister(&unix_proto);
2089}
2090
2091module_init(af_unix_init);
2092module_exit(af_unix_exit);
2093
2094MODULE_LICENSE("GPL");
2095MODULE_ALIAS_NETPROTO(PF_UNIX);