]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/unix/af_unix.c
[AF_UNIX]: Kernel memory leak fix for af_unix datagram getpeersec patch
[net-next-2.6.git] / net / unix / af_unix.c
CommitLineData
1da177e4
LT
1/*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan.cox@linux.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
75 *
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
83 */
84
85#include <linux/module.h>
1da177e4 86#include <linux/kernel.h>
1da177e4
LT
87#include <linux/signal.h>
88#include <linux/sched.h>
89#include <linux/errno.h>
90#include <linux/string.h>
91#include <linux/stat.h>
92#include <linux/dcache.h>
93#include <linux/namei.h>
94#include <linux/socket.h>
95#include <linux/un.h>
96#include <linux/fcntl.h>
97#include <linux/termios.h>
98#include <linux/sockios.h>
99#include <linux/net.h>
100#include <linux/in.h>
101#include <linux/fs.h>
102#include <linux/slab.h>
103#include <asm/uaccess.h>
104#include <linux/skbuff.h>
105#include <linux/netdevice.h>
106#include <net/sock.h>
c752f073 107#include <net/tcp_states.h>
1da177e4
LT
108#include <net/af_unix.h>
109#include <linux/proc_fs.h>
110#include <linux/seq_file.h>
111#include <net/scm.h>
112#include <linux/init.h>
113#include <linux/poll.h>
114#include <linux/smp_lock.h>
115#include <linux/rtnetlink.h>
116#include <linux/mount.h>
117#include <net/checksum.h>
118#include <linux/security.h>
119
120int sysctl_unix_max_dgram_qlen = 10;
121
122struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
fbe9cc4a 123DEFINE_SPINLOCK(unix_table_lock);
1da177e4
LT
124static atomic_t unix_nr_socks = ATOMIC_INIT(0);
125
126#define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
127
128#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
129
877ce7c1 130#ifdef CONFIG_SECURITY_NETWORK
dc49c1f9 131static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1 132{
dc49c1f9 133 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
877ce7c1
CZ
134}
135
136static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
137{
dc49c1f9 138 scm->secid = *UNIXSID(skb);
877ce7c1
CZ
139}
140#else
dc49c1f9 141static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
877ce7c1
CZ
142{ }
143
144static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145{ }
146#endif /* CONFIG_SECURITY_NETWORK */
147
1da177e4
LT
148/*
149 * SMP locking strategy:
fbe9cc4a 150 * hash table is protected with spinlock unix_table_lock
1da177e4
LT
151 * each socket state is protected by separate rwlock.
152 */
153
154static inline unsigned unix_hash_fold(unsigned hash)
155{
156 hash ^= hash>>16;
157 hash ^= hash>>8;
158 return hash&(UNIX_HASH_SIZE-1);
159}
160
161#define unix_peer(sk) (unix_sk(sk)->peer)
162
163static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164{
165 return unix_peer(osk) == sk;
166}
167
168static inline int unix_may_send(struct sock *sk, struct sock *osk)
169{
170 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171}
172
173static struct sock *unix_peer_get(struct sock *s)
174{
175 struct sock *peer;
176
177 unix_state_rlock(s);
178 peer = unix_peer(s);
179 if (peer)
180 sock_hold(peer);
181 unix_state_runlock(s);
182 return peer;
183}
184
185static inline void unix_release_addr(struct unix_address *addr)
186{
187 if (atomic_dec_and_test(&addr->refcnt))
188 kfree(addr);
189}
190
191/*
192 * Check unix socket name:
193 * - should be not zero length.
194 * - if started by not zero, should be NULL terminated (FS object)
195 * - if started by zero, it is abstract name.
196 */
197
198static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199{
200 if (len <= sizeof(short) || len > sizeof(*sunaddr))
201 return -EINVAL;
202 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203 return -EINVAL;
204 if (sunaddr->sun_path[0]) {
205 /*
206 * This may look like an off by one error but it is a bit more
207 * subtle. 108 is the longest valid AF_UNIX path for a binding.
208 * sun_path[108] doesnt as such exist. However in kernel space
209 * we are guaranteed that it is a valid memory location in our
210 * kernel address buffer.
211 */
212 ((char *)sunaddr)[len]=0;
213 len = strlen(sunaddr->sun_path)+1+sizeof(short);
214 return len;
215 }
216
217 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218 return len;
219}
220
221static void __unix_remove_socket(struct sock *sk)
222{
223 sk_del_node_init(sk);
224}
225
226static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227{
228 BUG_TRAP(sk_unhashed(sk));
229 sk_add_node(sk, list);
230}
231
232static inline void unix_remove_socket(struct sock *sk)
233{
fbe9cc4a 234 spin_lock(&unix_table_lock);
1da177e4 235 __unix_remove_socket(sk);
fbe9cc4a 236 spin_unlock(&unix_table_lock);
1da177e4
LT
237}
238
239static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240{
fbe9cc4a 241 spin_lock(&unix_table_lock);
1da177e4 242 __unix_insert_socket(list, sk);
fbe9cc4a 243 spin_unlock(&unix_table_lock);
1da177e4
LT
244}
245
246static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247 int len, int type, unsigned hash)
248{
249 struct sock *s;
250 struct hlist_node *node;
251
252 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253 struct unix_sock *u = unix_sk(s);
254
255 if (u->addr->len == len &&
256 !memcmp(u->addr->name, sunname, len))
257 goto found;
258 }
259 s = NULL;
260found:
261 return s;
262}
263
264static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265 int len, int type,
266 unsigned hash)
267{
268 struct sock *s;
269
fbe9cc4a 270 spin_lock(&unix_table_lock);
1da177e4
LT
271 s = __unix_find_socket_byname(sunname, len, type, hash);
272 if (s)
273 sock_hold(s);
fbe9cc4a 274 spin_unlock(&unix_table_lock);
1da177e4
LT
275 return s;
276}
277
278static struct sock *unix_find_socket_byinode(struct inode *i)
279{
280 struct sock *s;
281 struct hlist_node *node;
282
fbe9cc4a 283 spin_lock(&unix_table_lock);
1da177e4
LT
284 sk_for_each(s, node,
285 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286 struct dentry *dentry = unix_sk(s)->dentry;
287
288 if(dentry && dentry->d_inode == i)
289 {
290 sock_hold(s);
291 goto found;
292 }
293 }
294 s = NULL;
295found:
fbe9cc4a 296 spin_unlock(&unix_table_lock);
1da177e4
LT
297 return s;
298}
299
300static inline int unix_writable(struct sock *sk)
301{
302 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303}
304
305static void unix_write_space(struct sock *sk)
306{
307 read_lock(&sk->sk_callback_lock);
308 if (unix_writable(sk)) {
309 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310 wake_up_interruptible(sk->sk_sleep);
311 sk_wake_async(sk, 2, POLL_OUT);
312 }
313 read_unlock(&sk->sk_callback_lock);
314}
315
316/* When dgram socket disconnects (or changes its peer), we clear its receive
317 * queue of packets arrived from previous peer. First, it allows to do
318 * flow control based only on wmem_alloc; second, sk connected to peer
319 * may receive messages only from that peer. */
320static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321{
b03efcfb 322 if (!skb_queue_empty(&sk->sk_receive_queue)) {
1da177e4
LT
323 skb_queue_purge(&sk->sk_receive_queue);
324 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
325
326 /* If one link of bidirectional dgram pipe is disconnected,
327 * we signal error. Messages are lost. Do not make this,
328 * when peer was not connected to us.
329 */
330 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331 other->sk_err = ECONNRESET;
332 other->sk_error_report(other);
333 }
334 }
335}
336
337static void unix_sock_destructor(struct sock *sk)
338{
339 struct unix_sock *u = unix_sk(sk);
340
341 skb_queue_purge(&sk->sk_receive_queue);
342
343 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344 BUG_TRAP(sk_unhashed(sk));
345 BUG_TRAP(!sk->sk_socket);
346 if (!sock_flag(sk, SOCK_DEAD)) {
347 printk("Attempt to release alive unix socket: %p\n", sk);
348 return;
349 }
350
351 if (u->addr)
352 unix_release_addr(u->addr);
353
354 atomic_dec(&unix_nr_socks);
355#ifdef UNIX_REFCNT_DEBUG
356 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357#endif
358}
359
360static int unix_release_sock (struct sock *sk, int embrion)
361{
362 struct unix_sock *u = unix_sk(sk);
363 struct dentry *dentry;
364 struct vfsmount *mnt;
365 struct sock *skpair;
366 struct sk_buff *skb;
367 int state;
368
369 unix_remove_socket(sk);
370
371 /* Clear state */
372 unix_state_wlock(sk);
373 sock_orphan(sk);
374 sk->sk_shutdown = SHUTDOWN_MASK;
375 dentry = u->dentry;
376 u->dentry = NULL;
377 mnt = u->mnt;
378 u->mnt = NULL;
379 state = sk->sk_state;
380 sk->sk_state = TCP_CLOSE;
381 unix_state_wunlock(sk);
382
383 wake_up_interruptible_all(&u->peer_wait);
384
385 skpair=unix_peer(sk);
386
387 if (skpair!=NULL) {
388 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389 unix_state_wlock(skpair);
390 /* No more writes */
391 skpair->sk_shutdown = SHUTDOWN_MASK;
392 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393 skpair->sk_err = ECONNRESET;
394 unix_state_wunlock(skpair);
395 skpair->sk_state_change(skpair);
396 read_lock(&skpair->sk_callback_lock);
397 sk_wake_async(skpair,1,POLL_HUP);
398 read_unlock(&skpair->sk_callback_lock);
399 }
400 sock_put(skpair); /* It may now die */
401 unix_peer(sk) = NULL;
402 }
403
404 /* Try to flush out this socket. Throw out buffers at least */
405
406 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407 if (state==TCP_LISTEN)
408 unix_release_sock(skb->sk, 1);
409 /* passed fds are erased in the kfree_skb hook */
410 kfree_skb(skb);
411 }
412
413 if (dentry) {
414 dput(dentry);
415 mntput(mnt);
416 }
417
418 sock_put(sk);
419
420 /* ---- Socket is dead now and most probably destroyed ---- */
421
422 /*
423 * Fixme: BSD difference: In BSD all sockets connected to use get
424 * ECONNRESET and we die on the spot. In Linux we behave
425 * like files and pipes do and wait for the last
426 * dereference.
427 *
428 * Can't we simply set sock->err?
429 *
430 * What the above comment does talk about? --ANK(980817)
431 */
432
433 if (atomic_read(&unix_tot_inflight))
434 unix_gc(); /* Garbage collect fds */
435
436 return 0;
437}
438
439static int unix_listen(struct socket *sock, int backlog)
440{
441 int err;
442 struct sock *sk = sock->sk;
443 struct unix_sock *u = unix_sk(sk);
444
445 err = -EOPNOTSUPP;
446 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447 goto out; /* Only stream/seqpacket sockets accept */
448 err = -EINVAL;
449 if (!u->addr)
450 goto out; /* No listens on an unbound socket */
451 unix_state_wlock(sk);
452 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453 goto out_unlock;
454 if (backlog > sk->sk_max_ack_backlog)
455 wake_up_interruptible_all(&u->peer_wait);
456 sk->sk_max_ack_backlog = backlog;
457 sk->sk_state = TCP_LISTEN;
458 /* set credentials so connect can copy them */
459 sk->sk_peercred.pid = current->tgid;
460 sk->sk_peercred.uid = current->euid;
461 sk->sk_peercred.gid = current->egid;
462 err = 0;
463
464out_unlock:
465 unix_state_wunlock(sk);
466out:
467 return err;
468}
469
470static int unix_release(struct socket *);
471static int unix_bind(struct socket *, struct sockaddr *, int);
472static int unix_stream_connect(struct socket *, struct sockaddr *,
473 int addr_len, int flags);
474static int unix_socketpair(struct socket *, struct socket *);
475static int unix_accept(struct socket *, struct socket *, int);
476static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479static int unix_shutdown(struct socket *, int);
480static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481 struct msghdr *, size_t);
482static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483 struct msghdr *, size_t, int);
484static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485 struct msghdr *, size_t);
486static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487 struct msghdr *, size_t, int);
488static int unix_dgram_connect(struct socket *, struct sockaddr *,
489 int, int);
490static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491 struct msghdr *, size_t);
492
90ddc4f0 493static const struct proto_ops unix_stream_ops = {
1da177e4
LT
494 .family = PF_UNIX,
495 .owner = THIS_MODULE,
496 .release = unix_release,
497 .bind = unix_bind,
498 .connect = unix_stream_connect,
499 .socketpair = unix_socketpair,
500 .accept = unix_accept,
501 .getname = unix_getname,
502 .poll = unix_poll,
503 .ioctl = unix_ioctl,
504 .listen = unix_listen,
505 .shutdown = unix_shutdown,
506 .setsockopt = sock_no_setsockopt,
507 .getsockopt = sock_no_getsockopt,
508 .sendmsg = unix_stream_sendmsg,
509 .recvmsg = unix_stream_recvmsg,
510 .mmap = sock_no_mmap,
511 .sendpage = sock_no_sendpage,
512};
513
90ddc4f0 514static const struct proto_ops unix_dgram_ops = {
1da177e4
LT
515 .family = PF_UNIX,
516 .owner = THIS_MODULE,
517 .release = unix_release,
518 .bind = unix_bind,
519 .connect = unix_dgram_connect,
520 .socketpair = unix_socketpair,
521 .accept = sock_no_accept,
522 .getname = unix_getname,
523 .poll = datagram_poll,
524 .ioctl = unix_ioctl,
525 .listen = sock_no_listen,
526 .shutdown = unix_shutdown,
527 .setsockopt = sock_no_setsockopt,
528 .getsockopt = sock_no_getsockopt,
529 .sendmsg = unix_dgram_sendmsg,
530 .recvmsg = unix_dgram_recvmsg,
531 .mmap = sock_no_mmap,
532 .sendpage = sock_no_sendpage,
533};
534
90ddc4f0 535static const struct proto_ops unix_seqpacket_ops = {
1da177e4
LT
536 .family = PF_UNIX,
537 .owner = THIS_MODULE,
538 .release = unix_release,
539 .bind = unix_bind,
540 .connect = unix_stream_connect,
541 .socketpair = unix_socketpair,
542 .accept = unix_accept,
543 .getname = unix_getname,
544 .poll = datagram_poll,
545 .ioctl = unix_ioctl,
546 .listen = unix_listen,
547 .shutdown = unix_shutdown,
548 .setsockopt = sock_no_setsockopt,
549 .getsockopt = sock_no_getsockopt,
550 .sendmsg = unix_seqpacket_sendmsg,
551 .recvmsg = unix_dgram_recvmsg,
552 .mmap = sock_no_mmap,
553 .sendpage = sock_no_sendpage,
554};
555
556static struct proto unix_proto = {
557 .name = "UNIX",
558 .owner = THIS_MODULE,
559 .obj_size = sizeof(struct unix_sock),
560};
561
a09785a2
IM
562/*
563 * AF_UNIX sockets do not interact with hardware, hence they
564 * dont trigger interrupts - so it's safe for them to have
565 * bh-unsafe locking for their sk_receive_queue.lock. Split off
566 * this special lock-class by reinitializing the spinlock key:
567 */
568static struct lock_class_key af_unix_sk_receive_queue_lock_key;
569
1da177e4
LT
570static struct sock * unix_create1(struct socket *sock)
571{
572 struct sock *sk = NULL;
573 struct unix_sock *u;
574
529bf6be 575 if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
1da177e4
LT
576 goto out;
577
578 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579 if (!sk)
580 goto out;
581
582 atomic_inc(&unix_nr_socks);
583
584 sock_init_data(sock,sk);
a09785a2
IM
585 lockdep_set_class(&sk->sk_receive_queue.lock,
586 &af_unix_sk_receive_queue_lock_key);
1da177e4
LT
587
588 sk->sk_write_space = unix_write_space;
589 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
590 sk->sk_destruct = unix_sock_destructor;
591 u = unix_sk(sk);
592 u->dentry = NULL;
593 u->mnt = NULL;
fd19f329 594 spin_lock_init(&u->lock);
1da177e4 595 atomic_set(&u->inflight, sock ? 0 : -1);
57b47a53 596 mutex_init(&u->readlock); /* single task reading lock */
1da177e4
LT
597 init_waitqueue_head(&u->peer_wait);
598 unix_insert_socket(unix_sockets_unbound, sk);
599out:
600 return sk;
601}
602
603static int unix_create(struct socket *sock, int protocol)
604{
605 if (protocol && protocol != PF_UNIX)
606 return -EPROTONOSUPPORT;
607
608 sock->state = SS_UNCONNECTED;
609
610 switch (sock->type) {
611 case SOCK_STREAM:
612 sock->ops = &unix_stream_ops;
613 break;
614 /*
615 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
616 * nothing uses it.
617 */
618 case SOCK_RAW:
619 sock->type=SOCK_DGRAM;
620 case SOCK_DGRAM:
621 sock->ops = &unix_dgram_ops;
622 break;
623 case SOCK_SEQPACKET:
624 sock->ops = &unix_seqpacket_ops;
625 break;
626 default:
627 return -ESOCKTNOSUPPORT;
628 }
629
630 return unix_create1(sock) ? 0 : -ENOMEM;
631}
632
633static int unix_release(struct socket *sock)
634{
635 struct sock *sk = sock->sk;
636
637 if (!sk)
638 return 0;
639
640 sock->sk = NULL;
641
642 return unix_release_sock (sk, 0);
643}
644
645static int unix_autobind(struct socket *sock)
646{
647 struct sock *sk = sock->sk;
648 struct unix_sock *u = unix_sk(sk);
649 static u32 ordernum = 1;
650 struct unix_address * addr;
651 int err;
652
57b47a53 653 mutex_lock(&u->readlock);
1da177e4
LT
654
655 err = 0;
656 if (u->addr)
657 goto out;
658
659 err = -ENOMEM;
0da974f4 660 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
1da177e4
LT
661 if (!addr)
662 goto out;
663
1da177e4
LT
664 addr->name->sun_family = AF_UNIX;
665 atomic_set(&addr->refcnt, 1);
666
667retry:
668 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
669 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
670
fbe9cc4a 671 spin_lock(&unix_table_lock);
1da177e4
LT
672 ordernum = (ordernum+1)&0xFFFFF;
673
674 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
675 addr->hash)) {
fbe9cc4a 676 spin_unlock(&unix_table_lock);
1da177e4
LT
677 /* Sanity yield. It is unusual case, but yet... */
678 if (!(ordernum&0xFF))
679 yield();
680 goto retry;
681 }
682 addr->hash ^= sk->sk_type;
683
684 __unix_remove_socket(sk);
685 u->addr = addr;
686 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
fbe9cc4a 687 spin_unlock(&unix_table_lock);
1da177e4
LT
688 err = 0;
689
57b47a53 690out: mutex_unlock(&u->readlock);
1da177e4
LT
691 return err;
692}
693
694static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
695 int type, unsigned hash, int *error)
696{
697 struct sock *u;
698 struct nameidata nd;
699 int err = 0;
700
701 if (sunname->sun_path[0]) {
702 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
703 if (err)
704 goto fail;
e4543edd 705 err = vfs_permission(&nd, MAY_WRITE);
1da177e4
LT
706 if (err)
707 goto put_fail;
708
709 err = -ECONNREFUSED;
710 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
711 goto put_fail;
712 u=unix_find_socket_byinode(nd.dentry->d_inode);
713 if (!u)
714 goto put_fail;
715
716 if (u->sk_type == type)
717 touch_atime(nd.mnt, nd.dentry);
718
719 path_release(&nd);
720
721 err=-EPROTOTYPE;
722 if (u->sk_type != type) {
723 sock_put(u);
724 goto fail;
725 }
726 } else {
727 err = -ECONNREFUSED;
728 u=unix_find_socket_byname(sunname, len, type, hash);
729 if (u) {
730 struct dentry *dentry;
731 dentry = unix_sk(u)->dentry;
732 if (dentry)
733 touch_atime(unix_sk(u)->mnt, dentry);
734 } else
735 goto fail;
736 }
737 return u;
738
739put_fail:
740 path_release(&nd);
741fail:
742 *error=err;
743 return NULL;
744}
745
746
747static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
748{
749 struct sock *sk = sock->sk;
750 struct unix_sock *u = unix_sk(sk);
751 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
752 struct dentry * dentry = NULL;
753 struct nameidata nd;
754 int err;
755 unsigned hash;
756 struct unix_address *addr;
757 struct hlist_head *list;
758
759 err = -EINVAL;
760 if (sunaddr->sun_family != AF_UNIX)
761 goto out;
762
763 if (addr_len==sizeof(short)) {
764 err = unix_autobind(sock);
765 goto out;
766 }
767
768 err = unix_mkname(sunaddr, addr_len, &hash);
769 if (err < 0)
770 goto out;
771 addr_len = err;
772
57b47a53 773 mutex_lock(&u->readlock);
1da177e4
LT
774
775 err = -EINVAL;
776 if (u->addr)
777 goto out_up;
778
779 err = -ENOMEM;
780 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
781 if (!addr)
782 goto out_up;
783
784 memcpy(addr->name, sunaddr, addr_len);
785 addr->len = addr_len;
786 addr->hash = hash ^ sk->sk_type;
787 atomic_set(&addr->refcnt, 1);
788
789 if (sunaddr->sun_path[0]) {
790 unsigned int mode;
791 err = 0;
792 /*
793 * Get the parent directory, calculate the hash for last
794 * component.
795 */
796 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
797 if (err)
798 goto out_mknod_parent;
f81a0bff
CH
799
800 dentry = lookup_create(&nd, 0);
1da177e4
LT
801 err = PTR_ERR(dentry);
802 if (IS_ERR(dentry))
803 goto out_mknod_unlock;
f81a0bff 804
1da177e4
LT
805 /*
806 * All right, let's create it.
807 */
808 mode = S_IFSOCK |
809 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
810 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
811 if (err)
812 goto out_mknod_dput;
1b1dcc1b 813 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
814 dput(nd.dentry);
815 nd.dentry = dentry;
816
817 addr->hash = UNIX_HASH_SIZE;
818 }
819
fbe9cc4a 820 spin_lock(&unix_table_lock);
1da177e4
LT
821
822 if (!sunaddr->sun_path[0]) {
823 err = -EADDRINUSE;
824 if (__unix_find_socket_byname(sunaddr, addr_len,
825 sk->sk_type, hash)) {
826 unix_release_addr(addr);
827 goto out_unlock;
828 }
829
830 list = &unix_socket_table[addr->hash];
831 } else {
832 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
833 u->dentry = nd.dentry;
834 u->mnt = nd.mnt;
835 }
836
837 err = 0;
838 __unix_remove_socket(sk);
839 u->addr = addr;
840 __unix_insert_socket(list, sk);
841
842out_unlock:
fbe9cc4a 843 spin_unlock(&unix_table_lock);
1da177e4 844out_up:
57b47a53 845 mutex_unlock(&u->readlock);
1da177e4
LT
846out:
847 return err;
848
849out_mknod_dput:
850 dput(dentry);
851out_mknod_unlock:
1b1dcc1b 852 mutex_unlock(&nd.dentry->d_inode->i_mutex);
1da177e4
LT
853 path_release(&nd);
854out_mknod_parent:
855 if (err==-EEXIST)
856 err=-EADDRINUSE;
857 unix_release_addr(addr);
858 goto out_up;
859}
860
861static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
862 int alen, int flags)
863{
864 struct sock *sk = sock->sk;
865 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
866 struct sock *other;
867 unsigned hash;
868 int err;
869
870 if (addr->sa_family != AF_UNSPEC) {
871 err = unix_mkname(sunaddr, alen, &hash);
872 if (err < 0)
873 goto out;
874 alen = err;
875
876 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
877 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
878 goto out;
879
880 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
881 if (!other)
882 goto out;
883
884 unix_state_wlock(sk);
885
886 err = -EPERM;
887 if (!unix_may_send(sk, other))
888 goto out_unlock;
889
890 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
891 if (err)
892 goto out_unlock;
893
894 } else {
895 /*
896 * 1003.1g breaking connected state with AF_UNSPEC
897 */
898 other = NULL;
899 unix_state_wlock(sk);
900 }
901
902 /*
903 * If it was connected, reconnect.
904 */
905 if (unix_peer(sk)) {
906 struct sock *old_peer = unix_peer(sk);
907 unix_peer(sk)=other;
908 unix_state_wunlock(sk);
909
910 if (other != old_peer)
911 unix_dgram_disconnected(sk, old_peer);
912 sock_put(old_peer);
913 } else {
914 unix_peer(sk)=other;
915 unix_state_wunlock(sk);
916 }
917 return 0;
918
919out_unlock:
920 unix_state_wunlock(sk);
921 sock_put(other);
922out:
923 return err;
924}
925
926static long unix_wait_for_peer(struct sock *other, long timeo)
927{
928 struct unix_sock *u = unix_sk(other);
929 int sched;
930 DEFINE_WAIT(wait);
931
932 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
933
934 sched = !sock_flag(other, SOCK_DEAD) &&
935 !(other->sk_shutdown & RCV_SHUTDOWN) &&
936 (skb_queue_len(&other->sk_receive_queue) >
937 other->sk_max_ack_backlog);
938
939 unix_state_runlock(other);
940
941 if (sched)
942 timeo = schedule_timeout(timeo);
943
944 finish_wait(&u->peer_wait, &wait);
945 return timeo;
946}
947
948static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
949 int addr_len, int flags)
950{
951 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
952 struct sock *sk = sock->sk;
953 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
954 struct sock *newsk = NULL;
955 struct sock *other = NULL;
956 struct sk_buff *skb = NULL;
957 unsigned hash;
958 int st;
959 int err;
960 long timeo;
961
962 err = unix_mkname(sunaddr, addr_len, &hash);
963 if (err < 0)
964 goto out;
965 addr_len = err;
966
967 if (test_bit(SOCK_PASSCRED, &sock->flags)
968 && !u->addr && (err = unix_autobind(sock)) != 0)
969 goto out;
970
971 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
972
973 /* First of all allocate resources.
974 If we will make it after state is locked,
975 we will have to recheck all again in any case.
976 */
977
978 err = -ENOMEM;
979
980 /* create new sock for complete connection */
981 newsk = unix_create1(NULL);
982 if (newsk == NULL)
983 goto out;
984
985 /* Allocate skb for sending to listening sock */
986 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
987 if (skb == NULL)
988 goto out;
989
990restart:
991 /* Find listening sock. */
992 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
993 if (!other)
994 goto out;
995
996 /* Latch state of peer */
997 unix_state_rlock(other);
998
999 /* Apparently VFS overslept socket death. Retry. */
1000 if (sock_flag(other, SOCK_DEAD)) {
1001 unix_state_runlock(other);
1002 sock_put(other);
1003 goto restart;
1004 }
1005
1006 err = -ECONNREFUSED;
1007 if (other->sk_state != TCP_LISTEN)
1008 goto out_unlock;
1009
1010 if (skb_queue_len(&other->sk_receive_queue) >
1011 other->sk_max_ack_backlog) {
1012 err = -EAGAIN;
1013 if (!timeo)
1014 goto out_unlock;
1015
1016 timeo = unix_wait_for_peer(other, timeo);
1017
1018 err = sock_intr_errno(timeo);
1019 if (signal_pending(current))
1020 goto out;
1021 sock_put(other);
1022 goto restart;
1023 }
1024
1025 /* Latch our state.
1026
1027 It is tricky place. We need to grab write lock and cannot
1028 drop lock on peer. It is dangerous because deadlock is
1029 possible. Connect to self case and simultaneous
1030 attempt to connect are eliminated by checking socket
1031 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1032 check this before attempt to grab lock.
1033
1034 Well, and we have to recheck the state after socket locked.
1035 */
1036 st = sk->sk_state;
1037
1038 switch (st) {
1039 case TCP_CLOSE:
1040 /* This is ok... continue with connect */
1041 break;
1042 case TCP_ESTABLISHED:
1043 /* Socket is already connected */
1044 err = -EISCONN;
1045 goto out_unlock;
1046 default:
1047 err = -EINVAL;
1048 goto out_unlock;
1049 }
1050
a09785a2 1051 unix_state_wlock_nested(sk);
1da177e4
LT
1052
1053 if (sk->sk_state != st) {
1054 unix_state_wunlock(sk);
1055 unix_state_runlock(other);
1056 sock_put(other);
1057 goto restart;
1058 }
1059
1060 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1061 if (err) {
1062 unix_state_wunlock(sk);
1063 goto out_unlock;
1064 }
1065
1066 /* The way is open! Fastly set all the necessary fields... */
1067
1068 sock_hold(sk);
1069 unix_peer(newsk) = sk;
1070 newsk->sk_state = TCP_ESTABLISHED;
1071 newsk->sk_type = sk->sk_type;
1072 newsk->sk_peercred.pid = current->tgid;
1073 newsk->sk_peercred.uid = current->euid;
1074 newsk->sk_peercred.gid = current->egid;
1075 newu = unix_sk(newsk);
1076 newsk->sk_sleep = &newu->peer_wait;
1077 otheru = unix_sk(other);
1078
1079 /* copy address information from listening to new sock*/
1080 if (otheru->addr) {
1081 atomic_inc(&otheru->addr->refcnt);
1082 newu->addr = otheru->addr;
1083 }
1084 if (otheru->dentry) {
1085 newu->dentry = dget(otheru->dentry);
1086 newu->mnt = mntget(otheru->mnt);
1087 }
1088
1089 /* Set credentials */
1090 sk->sk_peercred = other->sk_peercred;
1091
1da177e4
LT
1092 sock->state = SS_CONNECTED;
1093 sk->sk_state = TCP_ESTABLISHED;
830a1e5c
BL
1094 sock_hold(newsk);
1095
1096 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1097 unix_peer(sk) = newsk;
1da177e4
LT
1098
1099 unix_state_wunlock(sk);
1100
1101 /* take ten and and send info to listening sock */
1102 spin_lock(&other->sk_receive_queue.lock);
1103 __skb_queue_tail(&other->sk_receive_queue, skb);
1104 /* Undo artificially decreased inflight after embrion
1105 * is installed to listening socket. */
1106 atomic_inc(&newu->inflight);
1107 spin_unlock(&other->sk_receive_queue.lock);
1108 unix_state_runlock(other);
1109 other->sk_data_ready(other, 0);
1110 sock_put(other);
1111 return 0;
1112
1113out_unlock:
1114 if (other)
1115 unix_state_runlock(other);
1116
1117out:
1118 if (skb)
1119 kfree_skb(skb);
1120 if (newsk)
1121 unix_release_sock(newsk, 0);
1122 if (other)
1123 sock_put(other);
1124 return err;
1125}
1126
1127static int unix_socketpair(struct socket *socka, struct socket *sockb)
1128{
1129 struct sock *ska=socka->sk, *skb = sockb->sk;
1130
1131 /* Join our sockets back to back */
1132 sock_hold(ska);
1133 sock_hold(skb);
1134 unix_peer(ska)=skb;
1135 unix_peer(skb)=ska;
1136 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1137 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1138 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1139
1140 if (ska->sk_type != SOCK_DGRAM) {
1141 ska->sk_state = TCP_ESTABLISHED;
1142 skb->sk_state = TCP_ESTABLISHED;
1143 socka->state = SS_CONNECTED;
1144 sockb->state = SS_CONNECTED;
1145 }
1146 return 0;
1147}
1148
1149static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1150{
1151 struct sock *sk = sock->sk;
1152 struct sock *tsk;
1153 struct sk_buff *skb;
1154 int err;
1155
1156 err = -EOPNOTSUPP;
1157 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1158 goto out;
1159
1160 err = -EINVAL;
1161 if (sk->sk_state != TCP_LISTEN)
1162 goto out;
1163
1164 /* If socket state is TCP_LISTEN it cannot change (for now...),
1165 * so that no locks are necessary.
1166 */
1167
1168 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1169 if (!skb) {
1170 /* This means receive shutdown. */
1171 if (err == 0)
1172 err = -EINVAL;
1173 goto out;
1174 }
1175
1176 tsk = skb->sk;
1177 skb_free_datagram(sk, skb);
1178 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1179
1180 /* attach accepted sock to socket */
1181 unix_state_wlock(tsk);
1182 newsock->state = SS_CONNECTED;
1183 sock_graft(tsk, newsock);
1184 unix_state_wunlock(tsk);
1185 return 0;
1186
1187out:
1188 return err;
1189}
1190
1191
1192static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1193{
1194 struct sock *sk = sock->sk;
1195 struct unix_sock *u;
1196 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1197 int err = 0;
1198
1199 if (peer) {
1200 sk = unix_peer_get(sk);
1201
1202 err = -ENOTCONN;
1203 if (!sk)
1204 goto out;
1205 err = 0;
1206 } else {
1207 sock_hold(sk);
1208 }
1209
1210 u = unix_sk(sk);
1211 unix_state_rlock(sk);
1212 if (!u->addr) {
1213 sunaddr->sun_family = AF_UNIX;
1214 sunaddr->sun_path[0] = 0;
1215 *uaddr_len = sizeof(short);
1216 } else {
1217 struct unix_address *addr = u->addr;
1218
1219 *uaddr_len = addr->len;
1220 memcpy(sunaddr, addr->name, *uaddr_len);
1221 }
1222 unix_state_runlock(sk);
1223 sock_put(sk);
1224out:
1225 return err;
1226}
1227
1228static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1229{
1230 int i;
1231
1232 scm->fp = UNIXCB(skb).fp;
1233 skb->destructor = sock_wfree;
1234 UNIXCB(skb).fp = NULL;
1235
1236 for (i=scm->fp->count-1; i>=0; i--)
1237 unix_notinflight(scm->fp->fp[i]);
1238}
1239
1240static void unix_destruct_fds(struct sk_buff *skb)
1241{
1242 struct scm_cookie scm;
1243 memset(&scm, 0, sizeof(scm));
1244 unix_detach_fds(&scm, skb);
1245
1246 /* Alas, it calls VFS */
1247 /* So fscking what? fput() had been SMP-safe since the last Summer */
1248 scm_destroy(&scm);
1249 sock_wfree(skb);
1250}
1251
1252static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1253{
1254 int i;
1255 for (i=scm->fp->count-1; i>=0; i--)
1256 unix_inflight(scm->fp->fp[i]);
1257 UNIXCB(skb).fp = scm->fp;
1258 skb->destructor = unix_destruct_fds;
1259 scm->fp = NULL;
1260}
1261
1262/*
1263 * Send AF_UNIX data.
1264 */
1265
1266static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1267 struct msghdr *msg, size_t len)
1268{
1269 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1270 struct sock *sk = sock->sk;
1271 struct unix_sock *u = unix_sk(sk);
1272 struct sockaddr_un *sunaddr=msg->msg_name;
1273 struct sock *other = NULL;
1274 int namelen = 0; /* fake GCC */
1275 int err;
1276 unsigned hash;
1277 struct sk_buff *skb;
1278 long timeo;
1279 struct scm_cookie tmp_scm;
1280
1281 if (NULL == siocb->scm)
1282 siocb->scm = &tmp_scm;
1283 err = scm_send(sock, msg, siocb->scm);
1284 if (err < 0)
1285 return err;
1286
1287 err = -EOPNOTSUPP;
1288 if (msg->msg_flags&MSG_OOB)
1289 goto out;
1290
1291 if (msg->msg_namelen) {
1292 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1293 if (err < 0)
1294 goto out;
1295 namelen = err;
1296 } else {
1297 sunaddr = NULL;
1298 err = -ENOTCONN;
1299 other = unix_peer_get(sk);
1300 if (!other)
1301 goto out;
1302 }
1303
1304 if (test_bit(SOCK_PASSCRED, &sock->flags)
1305 && !u->addr && (err = unix_autobind(sock)) != 0)
1306 goto out;
1307
1308 err = -EMSGSIZE;
1309 if (len > sk->sk_sndbuf - 32)
1310 goto out;
1311
1312 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1313 if (skb==NULL)
1314 goto out;
1315
1316 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1317 if (siocb->scm->fp)
1318 unix_attach_fds(siocb->scm, skb);
dc49c1f9 1319 unix_get_secdata(siocb->scm, skb);
877ce7c1 1320
1da177e4
LT
1321 skb->h.raw = skb->data;
1322 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1323 if (err)
1324 goto out_free;
1325
1326 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1327
1328restart:
1329 if (!other) {
1330 err = -ECONNRESET;
1331 if (sunaddr == NULL)
1332 goto out_free;
1333
1334 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1335 hash, &err);
1336 if (other==NULL)
1337 goto out_free;
1338 }
1339
1340 unix_state_rlock(other);
1341 err = -EPERM;
1342 if (!unix_may_send(sk, other))
1343 goto out_unlock;
1344
1345 if (sock_flag(other, SOCK_DEAD)) {
1346 /*
1347 * Check with 1003.1g - what should
1348 * datagram error
1349 */
1350 unix_state_runlock(other);
1351 sock_put(other);
1352
1353 err = 0;
1354 unix_state_wlock(sk);
1355 if (unix_peer(sk) == other) {
1356 unix_peer(sk)=NULL;
1357 unix_state_wunlock(sk);
1358
1359 unix_dgram_disconnected(sk, other);
1360 sock_put(other);
1361 err = -ECONNREFUSED;
1362 } else {
1363 unix_state_wunlock(sk);
1364 }
1365
1366 other = NULL;
1367 if (err)
1368 goto out_free;
1369 goto restart;
1370 }
1371
1372 err = -EPIPE;
1373 if (other->sk_shutdown & RCV_SHUTDOWN)
1374 goto out_unlock;
1375
1376 if (sk->sk_type != SOCK_SEQPACKET) {
1377 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1378 if (err)
1379 goto out_unlock;
1380 }
1381
1382 if (unix_peer(other) != sk &&
1383 (skb_queue_len(&other->sk_receive_queue) >
1384 other->sk_max_ack_backlog)) {
1385 if (!timeo) {
1386 err = -EAGAIN;
1387 goto out_unlock;
1388 }
1389
1390 timeo = unix_wait_for_peer(other, timeo);
1391
1392 err = sock_intr_errno(timeo);
1393 if (signal_pending(current))
1394 goto out_free;
1395
1396 goto restart;
1397 }
1398
1399 skb_queue_tail(&other->sk_receive_queue, skb);
1400 unix_state_runlock(other);
1401 other->sk_data_ready(other, len);
1402 sock_put(other);
1403 scm_destroy(siocb->scm);
1404 return len;
1405
1406out_unlock:
1407 unix_state_runlock(other);
1408out_free:
1409 kfree_skb(skb);
1410out:
1411 if (other)
1412 sock_put(other);
1413 scm_destroy(siocb->scm);
1414 return err;
1415}
1416
1417
1418static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1419 struct msghdr *msg, size_t len)
1420{
1421 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1422 struct sock *sk = sock->sk;
1423 struct sock *other = NULL;
1424 struct sockaddr_un *sunaddr=msg->msg_name;
1425 int err,size;
1426 struct sk_buff *skb;
1427 int sent=0;
1428 struct scm_cookie tmp_scm;
1429
1430 if (NULL == siocb->scm)
1431 siocb->scm = &tmp_scm;
1432 err = scm_send(sock, msg, siocb->scm);
1433 if (err < 0)
1434 return err;
1435
1436 err = -EOPNOTSUPP;
1437 if (msg->msg_flags&MSG_OOB)
1438 goto out_err;
1439
1440 if (msg->msg_namelen) {
1441 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1442 goto out_err;
1443 } else {
1444 sunaddr = NULL;
1445 err = -ENOTCONN;
830a1e5c 1446 other = unix_peer(sk);
1da177e4
LT
1447 if (!other)
1448 goto out_err;
1449 }
1450
1451 if (sk->sk_shutdown & SEND_SHUTDOWN)
1452 goto pipe_err;
1453
1454 while(sent < len)
1455 {
1456 /*
e9df7d7f
BL
1457 * Optimisation for the fact that under 0.01% of X
1458 * messages typically need breaking up.
1da177e4
LT
1459 */
1460
e9df7d7f 1461 size = len-sent;
1da177e4
LT
1462
1463 /* Keep two messages in the pipe so it schedules better */
e9df7d7f
BL
1464 if (size > ((sk->sk_sndbuf >> 1) - 64))
1465 size = (sk->sk_sndbuf >> 1) - 64;
1da177e4
LT
1466
1467 if (size > SKB_MAX_ALLOC)
1468 size = SKB_MAX_ALLOC;
1469
1470 /*
1471 * Grab a buffer
1472 */
1473
1474 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1475
1476 if (skb==NULL)
1477 goto out_err;
1478
1479 /*
1480 * If you pass two values to the sock_alloc_send_skb
1481 * it tries to grab the large buffer with GFP_NOFS
1482 * (which can fail easily), and if it fails grab the
1483 * fallback size buffer which is under a page and will
1484 * succeed. [Alan]
1485 */
1486 size = min_t(int, size, skb_tailroom(skb));
1487
1488 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1489 if (siocb->scm->fp)
1490 unix_attach_fds(siocb->scm, skb);
1491
1492 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1493 kfree_skb(skb);
1494 goto out_err;
1495 }
1496
1497 unix_state_rlock(other);
1498
1499 if (sock_flag(other, SOCK_DEAD) ||
1500 (other->sk_shutdown & RCV_SHUTDOWN))
1501 goto pipe_err_free;
1502
1503 skb_queue_tail(&other->sk_receive_queue, skb);
1504 unix_state_runlock(other);
1505 other->sk_data_ready(other, size);
1506 sent+=size;
1507 }
1da177e4
LT
1508
1509 scm_destroy(siocb->scm);
1510 siocb->scm = NULL;
1511
1512 return sent;
1513
1514pipe_err_free:
1515 unix_state_runlock(other);
1516 kfree_skb(skb);
1517pipe_err:
1518 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1519 send_sig(SIGPIPE,current,0);
1520 err = -EPIPE;
1521out_err:
1da177e4
LT
1522 scm_destroy(siocb->scm);
1523 siocb->scm = NULL;
1524 return sent ? : err;
1525}
1526
1527static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1528 struct msghdr *msg, size_t len)
1529{
1530 int err;
1531 struct sock *sk = sock->sk;
1532
1533 err = sock_error(sk);
1534 if (err)
1535 return err;
1536
1537 if (sk->sk_state != TCP_ESTABLISHED)
1538 return -ENOTCONN;
1539
1540 if (msg->msg_namelen)
1541 msg->msg_namelen = 0;
1542
1543 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1544}
1545
1546static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1547{
1548 struct unix_sock *u = unix_sk(sk);
1549
1550 msg->msg_namelen = 0;
1551 if (u->addr) {
1552 msg->msg_namelen = u->addr->len;
1553 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1554 }
1555}
1556
1557static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1558 struct msghdr *msg, size_t size,
1559 int flags)
1560{
1561 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1562 struct scm_cookie tmp_scm;
1563 struct sock *sk = sock->sk;
1564 struct unix_sock *u = unix_sk(sk);
1565 int noblock = flags & MSG_DONTWAIT;
1566 struct sk_buff *skb;
1567 int err;
1568
1569 err = -EOPNOTSUPP;
1570 if (flags&MSG_OOB)
1571 goto out;
1572
1573 msg->msg_namelen = 0;
1574
57b47a53 1575 mutex_lock(&u->readlock);
1da177e4
LT
1576
1577 skb = skb_recv_datagram(sk, flags, noblock, &err);
1578 if (!skb)
1579 goto out_unlock;
1580
1581 wake_up_interruptible(&u->peer_wait);
1582
1583 if (msg->msg_name)
1584 unix_copy_addr(msg, skb->sk);
1585
1586 if (size > skb->len)
1587 size = skb->len;
1588 else if (size < skb->len)
1589 msg->msg_flags |= MSG_TRUNC;
1590
1591 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1592 if (err)
1593 goto out_free;
1594
1595 if (!siocb->scm) {
1596 siocb->scm = &tmp_scm;
1597 memset(&tmp_scm, 0, sizeof(tmp_scm));
1598 }
1599 siocb->scm->creds = *UNIXCREDS(skb);
877ce7c1 1600 unix_set_secdata(siocb->scm, skb);
1da177e4
LT
1601
1602 if (!(flags & MSG_PEEK))
1603 {
1604 if (UNIXCB(skb).fp)
1605 unix_detach_fds(siocb->scm, skb);
1606 }
1607 else
1608 {
1609 /* It is questionable: on PEEK we could:
1610 - do not return fds - good, but too simple 8)
1611 - return fds, and do not return them on read (old strategy,
1612 apparently wrong)
1613 - clone fds (I chose it for now, it is the most universal
1614 solution)
1615
1616 POSIX 1003.1g does not actually define this clearly
1617 at all. POSIX 1003.1g doesn't define a lot of things
1618 clearly however!
1619
1620 */
1621 if (UNIXCB(skb).fp)
1622 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1623 }
1624 err = size;
1625
1626 scm_recv(sock, msg, siocb->scm, flags);
1627
1628out_free:
1629 skb_free_datagram(sk,skb);
1630out_unlock:
57b47a53 1631 mutex_unlock(&u->readlock);
1da177e4
LT
1632out:
1633 return err;
1634}
1635
1636/*
1637 * Sleep until data has arrive. But check for races..
1638 */
1639
1640static long unix_stream_data_wait(struct sock * sk, long timeo)
1641{
1642 DEFINE_WAIT(wait);
1643
1644 unix_state_rlock(sk);
1645
1646 for (;;) {
1647 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1648
b03efcfb 1649 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1da177e4
LT
1650 sk->sk_err ||
1651 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1652 signal_pending(current) ||
1653 !timeo)
1654 break;
1655
1656 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1657 unix_state_runlock(sk);
1658 timeo = schedule_timeout(timeo);
1659 unix_state_rlock(sk);
1660 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1661 }
1662
1663 finish_wait(sk->sk_sleep, &wait);
1664 unix_state_runlock(sk);
1665 return timeo;
1666}
1667
1668
1669
1670static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1671 struct msghdr *msg, size_t size,
1672 int flags)
1673{
1674 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1675 struct scm_cookie tmp_scm;
1676 struct sock *sk = sock->sk;
1677 struct unix_sock *u = unix_sk(sk);
1678 struct sockaddr_un *sunaddr=msg->msg_name;
1679 int copied = 0;
1680 int check_creds = 0;
1681 int target;
1682 int err = 0;
1683 long timeo;
1684
1685 err = -EINVAL;
1686 if (sk->sk_state != TCP_ESTABLISHED)
1687 goto out;
1688
1689 err = -EOPNOTSUPP;
1690 if (flags&MSG_OOB)
1691 goto out;
1692
1693 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1694 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1695
1696 msg->msg_namelen = 0;
1697
1698 /* Lock the socket to prevent queue disordering
1699 * while sleeps in memcpy_tomsg
1700 */
1701
1702 if (!siocb->scm) {
1703 siocb->scm = &tmp_scm;
1704 memset(&tmp_scm, 0, sizeof(tmp_scm));
1705 }
1706
57b47a53 1707 mutex_lock(&u->readlock);
1da177e4
LT
1708
1709 do
1710 {
1711 int chunk;
1712 struct sk_buff *skb;
1713
1714 skb = skb_dequeue(&sk->sk_receive_queue);
1715 if (skb==NULL)
1716 {
1717 if (copied >= target)
1718 break;
1719
1720 /*
1721 * POSIX 1003.1g mandates this order.
1722 */
1723
1724 if ((err = sock_error(sk)) != 0)
1725 break;
1726 if (sk->sk_shutdown & RCV_SHUTDOWN)
1727 break;
1728 err = -EAGAIN;
1729 if (!timeo)
1730 break;
57b47a53 1731 mutex_unlock(&u->readlock);
1da177e4
LT
1732
1733 timeo = unix_stream_data_wait(sk, timeo);
1734
1735 if (signal_pending(current)) {
1736 err = sock_intr_errno(timeo);
1737 goto out;
1738 }
57b47a53 1739 mutex_lock(&u->readlock);
1da177e4
LT
1740 continue;
1741 }
1742
1743 if (check_creds) {
1744 /* Never glue messages from different writers */
1745 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1746 skb_queue_head(&sk->sk_receive_queue, skb);
1747 break;
1748 }
1749 } else {
1750 /* Copy credentials */
1751 siocb->scm->creds = *UNIXCREDS(skb);
1752 check_creds = 1;
1753 }
1754
1755 /* Copy address just once */
1756 if (sunaddr)
1757 {
1758 unix_copy_addr(msg, skb->sk);
1759 sunaddr = NULL;
1760 }
1761
1762 chunk = min_t(unsigned int, skb->len, size);
1763 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1764 skb_queue_head(&sk->sk_receive_queue, skb);
1765 if (copied == 0)
1766 copied = -EFAULT;
1767 break;
1768 }
1769 copied += chunk;
1770 size -= chunk;
1771
1772 /* Mark read part of skb as used */
1773 if (!(flags & MSG_PEEK))
1774 {
1775 skb_pull(skb, chunk);
1776
1777 if (UNIXCB(skb).fp)
1778 unix_detach_fds(siocb->scm, skb);
1779
1780 /* put the skb back if we didn't use it up.. */
1781 if (skb->len)
1782 {
1783 skb_queue_head(&sk->sk_receive_queue, skb);
1784 break;
1785 }
1786
1787 kfree_skb(skb);
1788
1789 if (siocb->scm->fp)
1790 break;
1791 }
1792 else
1793 {
1794 /* It is questionable, see note in unix_dgram_recvmsg.
1795 */
1796 if (UNIXCB(skb).fp)
1797 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1798
1799 /* put message back and return */
1800 skb_queue_head(&sk->sk_receive_queue, skb);
1801 break;
1802 }
1803 } while (size);
1804
57b47a53 1805 mutex_unlock(&u->readlock);
1da177e4
LT
1806 scm_recv(sock, msg, siocb->scm, flags);
1807out:
1808 return copied ? : err;
1809}
1810
1811static int unix_shutdown(struct socket *sock, int mode)
1812{
1813 struct sock *sk = sock->sk;
1814 struct sock *other;
1815
1816 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1817
1818 if (mode) {
1819 unix_state_wlock(sk);
1820 sk->sk_shutdown |= mode;
1821 other=unix_peer(sk);
1822 if (other)
1823 sock_hold(other);
1824 unix_state_wunlock(sk);
1825 sk->sk_state_change(sk);
1826
1827 if (other &&
1828 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1829
1830 int peer_mode = 0;
1831
1832 if (mode&RCV_SHUTDOWN)
1833 peer_mode |= SEND_SHUTDOWN;
1834 if (mode&SEND_SHUTDOWN)
1835 peer_mode |= RCV_SHUTDOWN;
1836 unix_state_wlock(other);
1837 other->sk_shutdown |= peer_mode;
1838 unix_state_wunlock(other);
1839 other->sk_state_change(other);
1840 read_lock(&other->sk_callback_lock);
1841 if (peer_mode == SHUTDOWN_MASK)
1842 sk_wake_async(other,1,POLL_HUP);
1843 else if (peer_mode & RCV_SHUTDOWN)
1844 sk_wake_async(other,1,POLL_IN);
1845 read_unlock(&other->sk_callback_lock);
1846 }
1847 if (other)
1848 sock_put(other);
1849 }
1850 return 0;
1851}
1852
1853static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1854{
1855 struct sock *sk = sock->sk;
1856 long amount=0;
1857 int err;
1858
1859 switch(cmd)
1860 {
1861 case SIOCOUTQ:
1862 amount = atomic_read(&sk->sk_wmem_alloc);
1863 err = put_user(amount, (int __user *)arg);
1864 break;
1865 case SIOCINQ:
1866 {
1867 struct sk_buff *skb;
1868
1869 if (sk->sk_state == TCP_LISTEN) {
1870 err = -EINVAL;
1871 break;
1872 }
1873
1874 spin_lock(&sk->sk_receive_queue.lock);
1875 if (sk->sk_type == SOCK_STREAM ||
1876 sk->sk_type == SOCK_SEQPACKET) {
1877 skb_queue_walk(&sk->sk_receive_queue, skb)
1878 amount += skb->len;
1879 } else {
1880 skb = skb_peek(&sk->sk_receive_queue);
1881 if (skb)
1882 amount=skb->len;
1883 }
1884 spin_unlock(&sk->sk_receive_queue.lock);
1885 err = put_user(amount, (int __user *)arg);
1886 break;
1887 }
1888
1889 default:
b5e5fa5e 1890 err = -ENOIOCTLCMD;
1da177e4
LT
1891 break;
1892 }
1893 return err;
1894}
1895
1896static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1897{
1898 struct sock *sk = sock->sk;
1899 unsigned int mask;
1900
1901 poll_wait(file, sk->sk_sleep, wait);
1902 mask = 0;
1903
1904 /* exceptional events? */
1905 if (sk->sk_err)
1906 mask |= POLLERR;
1907 if (sk->sk_shutdown == SHUTDOWN_MASK)
1908 mask |= POLLHUP;
f348d70a
DL
1909 if (sk->sk_shutdown & RCV_SHUTDOWN)
1910 mask |= POLLRDHUP;
1da177e4
LT
1911
1912 /* readable? */
1913 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1914 (sk->sk_shutdown & RCV_SHUTDOWN))
1915 mask |= POLLIN | POLLRDNORM;
1916
1917 /* Connection-based need to check for termination and startup */
1918 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1919 mask |= POLLHUP;
1920
1921 /*
1922 * we set writable also when the other side has shut down the
1923 * connection. This prevents stuck sockets.
1924 */
1925 if (unix_writable(sk))
1926 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1927
1928 return mask;
1929}
1930
1931
1932#ifdef CONFIG_PROC_FS
1933static struct sock *unix_seq_idx(int *iter, loff_t pos)
1934{
1935 loff_t off = 0;
1936 struct sock *s;
1937
1938 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1939 if (off == pos)
1940 return s;
1941 ++off;
1942 }
1943 return NULL;
1944}
1945
1946
1947static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1948{
fbe9cc4a 1949 spin_lock(&unix_table_lock);
1da177e4
LT
1950 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1951}
1952
1953static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1954{
1955 ++*pos;
1956
1957 if (v == (void *)1)
1958 return first_unix_socket(seq->private);
1959 return next_unix_socket(seq->private, v);
1960}
1961
1962static void unix_seq_stop(struct seq_file *seq, void *v)
1963{
fbe9cc4a 1964 spin_unlock(&unix_table_lock);
1da177e4
LT
1965}
1966
1967static int unix_seq_show(struct seq_file *seq, void *v)
1968{
1969
1970 if (v == (void *)1)
1971 seq_puts(seq, "Num RefCount Protocol Flags Type St "
1972 "Inode Path\n");
1973 else {
1974 struct sock *s = v;
1975 struct unix_sock *u = unix_sk(s);
1976 unix_state_rlock(s);
1977
1978 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1979 s,
1980 atomic_read(&s->sk_refcnt),
1981 0,
1982 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1983 s->sk_type,
1984 s->sk_socket ?
1985 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1986 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1987 sock_i_ino(s));
1988
1989 if (u->addr) {
1990 int i, len;
1991 seq_putc(seq, ' ');
1992
1993 i = 0;
1994 len = u->addr->len - sizeof(short);
1995 if (!UNIX_ABSTRACT(s))
1996 len--;
1997 else {
1998 seq_putc(seq, '@');
1999 i++;
2000 }
2001 for ( ; i < len; i++)
2002 seq_putc(seq, u->addr->name->sun_path[i]);
2003 }
2004 unix_state_runlock(s);
2005 seq_putc(seq, '\n');
2006 }
2007
2008 return 0;
2009}
2010
2011static struct seq_operations unix_seq_ops = {
2012 .start = unix_seq_start,
2013 .next = unix_seq_next,
2014 .stop = unix_seq_stop,
2015 .show = unix_seq_show,
2016};
2017
2018
2019static int unix_seq_open(struct inode *inode, struct file *file)
2020{
2021 struct seq_file *seq;
2022 int rc = -ENOMEM;
2023 int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2024
2025 if (!iter)
2026 goto out;
2027
2028 rc = seq_open(file, &unix_seq_ops);
2029 if (rc)
2030 goto out_kfree;
2031
2032 seq = file->private_data;
2033 seq->private = iter;
2034 *iter = 0;
2035out:
2036 return rc;
2037out_kfree:
2038 kfree(iter);
2039 goto out;
2040}
2041
2042static struct file_operations unix_seq_fops = {
2043 .owner = THIS_MODULE,
2044 .open = unix_seq_open,
2045 .read = seq_read,
2046 .llseek = seq_lseek,
2047 .release = seq_release_private,
2048};
2049
2050#endif
2051
2052static struct net_proto_family unix_family_ops = {
2053 .family = PF_UNIX,
2054 .create = unix_create,
2055 .owner = THIS_MODULE,
2056};
2057
1da177e4
LT
2058static int __init af_unix_init(void)
2059{
2060 int rc = -1;
2061 struct sk_buff *dummy_skb;
2062
2063 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2064 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2065 goto out;
2066 }
2067
2068 rc = proto_register(&unix_proto, 1);
2069 if (rc != 0) {
2070 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2071 __FUNCTION__);
2072 goto out;
2073 }
2074
2075 sock_register(&unix_family_ops);
2076#ifdef CONFIG_PROC_FS
2077 proc_net_fops_create("unix", 0, &unix_seq_fops);
2078#endif
2079 unix_sysctl_register();
2080out:
2081 return rc;
2082}
2083
2084static void __exit af_unix_exit(void)
2085{
2086 sock_unregister(PF_UNIX);
2087 unix_sysctl_unregister();
2088 proc_net_remove("unix");
2089 proto_unregister(&unix_proto);
2090}
2091
2092module_init(af_unix_init);
2093module_exit(af_unix_exit);
2094
2095MODULE_LICENSE("GPL");
2096MODULE_ALIAS_NETPROTO(PF_UNIX);