ia64/linux-2.6.18-xen.hg

view net/unix/af_unix.c @ 871:9cbcc9008446

xen/x86: don't initialize cpu_data[]'s apicid field on generic code

Afaict, this is not only redundant with the intialization done in
drivers/xen/core/smpboot.c, but actually results - at least for
secondary CPUs - in the Xen-specific value written to be later
overwritten with whatever the generic code determines (with no
guarantee that the two values are identical).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 14 10:09:15 2009 +0100 (2009-05-14)
parents 831230e53067
children
line source
1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
3 *
4 * Authors: Alan Cox, <alan.cox@linux.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12 *
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
53 *
54 *
55 * Known differences from reference BSD that was tested:
56 *
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
70 *
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
75 *
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
83 */
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/smp_lock.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
120 int sysctl_unix_max_dgram_qlen = 10;
122 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
123 DEFINE_SPINLOCK(unix_table_lock);
124 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
128 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130 #ifdef CONFIG_SECURITY_NETWORK
131 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
132 {
133 memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
134 }
136 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
137 {
138 scm->secid = *UNIXSID(skb);
139 }
140 #else
141 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
142 { }
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 { }
146 #endif /* CONFIG_SECURITY_NETWORK */
148 /*
149 * SMP locking strategy:
150 * hash table is protected with spinlock unix_table_lock
151 * each socket state is protected by separate rwlock.
152 */
154 static inline unsigned unix_hash_fold(unsigned hash)
155 {
156 hash ^= hash>>16;
157 hash ^= hash>>8;
158 return hash&(UNIX_HASH_SIZE-1);
159 }
161 #define unix_peer(sk) (unix_sk(sk)->peer)
163 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164 {
165 return unix_peer(osk) == sk;
166 }
168 static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 {
170 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171 }
173 static struct sock *unix_peer_get(struct sock *s)
174 {
175 struct sock *peer;
177 unix_state_rlock(s);
178 peer = unix_peer(s);
179 if (peer)
180 sock_hold(peer);
181 unix_state_runlock(s);
182 return peer;
183 }
185 static inline void unix_release_addr(struct unix_address *addr)
186 {
187 if (atomic_dec_and_test(&addr->refcnt))
188 kfree(addr);
189 }
191 /*
192 * Check unix socket name:
193 * - should be not zero length.
194 * - if started by not zero, should be NULL terminated (FS object)
195 * - if started by zero, it is abstract name.
196 */
198 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199 {
200 if (len <= sizeof(short) || len > sizeof(*sunaddr))
201 return -EINVAL;
202 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203 return -EINVAL;
204 if (sunaddr->sun_path[0]) {
205 /*
206 * This may look like an off by one error but it is a bit more
207 * subtle. 108 is the longest valid AF_UNIX path for a binding.
208 * sun_path[108] doesnt as such exist. However in kernel space
209 * we are guaranteed that it is a valid memory location in our
210 * kernel address buffer.
211 */
212 ((char *)sunaddr)[len]=0;
213 len = strlen(sunaddr->sun_path)+1+sizeof(short);
214 return len;
215 }
217 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218 return len;
219 }
221 static void __unix_remove_socket(struct sock *sk)
222 {
223 sk_del_node_init(sk);
224 }
226 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227 {
228 BUG_TRAP(sk_unhashed(sk));
229 sk_add_node(sk, list);
230 }
232 static inline void unix_remove_socket(struct sock *sk)
233 {
234 spin_lock(&unix_table_lock);
235 __unix_remove_socket(sk);
236 spin_unlock(&unix_table_lock);
237 }
239 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240 {
241 spin_lock(&unix_table_lock);
242 __unix_insert_socket(list, sk);
243 spin_unlock(&unix_table_lock);
244 }
246 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247 int len, int type, unsigned hash)
248 {
249 struct sock *s;
250 struct hlist_node *node;
252 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253 struct unix_sock *u = unix_sk(s);
255 if (u->addr->len == len &&
256 !memcmp(u->addr->name, sunname, len))
257 goto found;
258 }
259 s = NULL;
260 found:
261 return s;
262 }
264 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265 int len, int type,
266 unsigned hash)
267 {
268 struct sock *s;
270 spin_lock(&unix_table_lock);
271 s = __unix_find_socket_byname(sunname, len, type, hash);
272 if (s)
273 sock_hold(s);
274 spin_unlock(&unix_table_lock);
275 return s;
276 }
278 static struct sock *unix_find_socket_byinode(struct inode *i)
279 {
280 struct sock *s;
281 struct hlist_node *node;
283 spin_lock(&unix_table_lock);
284 sk_for_each(s, node,
285 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286 struct dentry *dentry = unix_sk(s)->dentry;
288 if(dentry && dentry->d_inode == i)
289 {
290 sock_hold(s);
291 goto found;
292 }
293 }
294 s = NULL;
295 found:
296 spin_unlock(&unix_table_lock);
297 return s;
298 }
300 static inline int unix_writable(struct sock *sk)
301 {
302 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303 }
305 static void unix_write_space(struct sock *sk)
306 {
307 read_lock(&sk->sk_callback_lock);
308 if (unix_writable(sk)) {
309 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310 wake_up_interruptible(sk->sk_sleep);
311 sk_wake_async(sk, 2, POLL_OUT);
312 }
313 read_unlock(&sk->sk_callback_lock);
314 }
316 /* When dgram socket disconnects (or changes its peer), we clear its receive
317 * queue of packets arrived from previous peer. First, it allows to do
318 * flow control based only on wmem_alloc; second, sk connected to peer
319 * may receive messages only from that peer. */
320 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321 {
322 if (!skb_queue_empty(&sk->sk_receive_queue)) {
323 skb_queue_purge(&sk->sk_receive_queue);
324 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
326 /* If one link of bidirectional dgram pipe is disconnected,
327 * we signal error. Messages are lost. Do not make this,
328 * when peer was not connected to us.
329 */
330 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331 other->sk_err = ECONNRESET;
332 other->sk_error_report(other);
333 }
334 }
335 }
337 static void unix_sock_destructor(struct sock *sk)
338 {
339 struct unix_sock *u = unix_sk(sk);
341 skb_queue_purge(&sk->sk_receive_queue);
343 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344 BUG_TRAP(sk_unhashed(sk));
345 BUG_TRAP(!sk->sk_socket);
346 if (!sock_flag(sk, SOCK_DEAD)) {
347 printk("Attempt to release alive unix socket: %p\n", sk);
348 return;
349 }
351 if (u->addr)
352 unix_release_addr(u->addr);
354 atomic_dec(&unix_nr_socks);
355 #ifdef UNIX_REFCNT_DEBUG
356 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357 #endif
358 }
360 static int unix_release_sock (struct sock *sk, int embrion)
361 {
362 struct unix_sock *u = unix_sk(sk);
363 struct dentry *dentry;
364 struct vfsmount *mnt;
365 struct sock *skpair;
366 struct sk_buff *skb;
367 int state;
369 unix_remove_socket(sk);
371 /* Clear state */
372 unix_state_wlock(sk);
373 sock_orphan(sk);
374 sk->sk_shutdown = SHUTDOWN_MASK;
375 dentry = u->dentry;
376 u->dentry = NULL;
377 mnt = u->mnt;
378 u->mnt = NULL;
379 state = sk->sk_state;
380 sk->sk_state = TCP_CLOSE;
381 unix_state_wunlock(sk);
383 wake_up_interruptible_all(&u->peer_wait);
385 skpair=unix_peer(sk);
387 if (skpair!=NULL) {
388 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389 unix_state_wlock(skpair);
390 /* No more writes */
391 skpair->sk_shutdown = SHUTDOWN_MASK;
392 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393 skpair->sk_err = ECONNRESET;
394 unix_state_wunlock(skpair);
395 skpair->sk_state_change(skpair);
396 read_lock(&skpair->sk_callback_lock);
397 sk_wake_async(skpair,1,POLL_HUP);
398 read_unlock(&skpair->sk_callback_lock);
399 }
400 sock_put(skpair); /* It may now die */
401 unix_peer(sk) = NULL;
402 }
404 /* Try to flush out this socket. Throw out buffers at least */
406 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407 if (state==TCP_LISTEN)
408 unix_release_sock(skb->sk, 1);
409 /* passed fds are erased in the kfree_skb hook */
410 kfree_skb(skb);
411 }
413 if (dentry) {
414 dput(dentry);
415 mntput(mnt);
416 }
418 sock_put(sk);
420 /* ---- Socket is dead now and most probably destroyed ---- */
422 /*
423 * Fixme: BSD difference: In BSD all sockets connected to use get
424 * ECONNRESET and we die on the spot. In Linux we behave
425 * like files and pipes do and wait for the last
426 * dereference.
427 *
428 * Can't we simply set sock->err?
429 *
430 * What the above comment does talk about? --ANK(980817)
431 */
433 if (atomic_read(&unix_tot_inflight))
434 unix_gc(); /* Garbage collect fds */
436 return 0;
437 }
439 static int unix_listen(struct socket *sock, int backlog)
440 {
441 int err;
442 struct sock *sk = sock->sk;
443 struct unix_sock *u = unix_sk(sk);
445 err = -EOPNOTSUPP;
446 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447 goto out; /* Only stream/seqpacket sockets accept */
448 err = -EINVAL;
449 if (!u->addr)
450 goto out; /* No listens on an unbound socket */
451 unix_state_wlock(sk);
452 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453 goto out_unlock;
454 if (backlog > sk->sk_max_ack_backlog)
455 wake_up_interruptible_all(&u->peer_wait);
456 sk->sk_max_ack_backlog = backlog;
457 sk->sk_state = TCP_LISTEN;
458 /* set credentials so connect can copy them */
459 sk->sk_peercred.pid = current->tgid;
460 sk->sk_peercred.uid = current->euid;
461 sk->sk_peercred.gid = current->egid;
462 err = 0;
464 out_unlock:
465 unix_state_wunlock(sk);
466 out:
467 return err;
468 }
470 static int unix_release(struct socket *);
471 static int unix_bind(struct socket *, struct sockaddr *, int);
472 static int unix_stream_connect(struct socket *, struct sockaddr *,
473 int addr_len, int flags);
474 static int unix_socketpair(struct socket *, struct socket *);
475 static int unix_accept(struct socket *, struct socket *, int);
476 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479 static int unix_shutdown(struct socket *, int);
480 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481 struct msghdr *, size_t);
482 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483 struct msghdr *, size_t, int);
484 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485 struct msghdr *, size_t);
486 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487 struct msghdr *, size_t, int);
488 static int unix_dgram_connect(struct socket *, struct sockaddr *,
489 int, int);
490 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491 struct msghdr *, size_t);
493 static const struct proto_ops unix_stream_ops = {
494 .family = PF_UNIX,
495 .owner = THIS_MODULE,
496 .release = unix_release,
497 .bind = unix_bind,
498 .connect = unix_stream_connect,
499 .socketpair = unix_socketpair,
500 .accept = unix_accept,
501 .getname = unix_getname,
502 .poll = unix_poll,
503 .ioctl = unix_ioctl,
504 .listen = unix_listen,
505 .shutdown = unix_shutdown,
506 .setsockopt = sock_no_setsockopt,
507 .getsockopt = sock_no_getsockopt,
508 .sendmsg = unix_stream_sendmsg,
509 .recvmsg = unix_stream_recvmsg,
510 .mmap = sock_no_mmap,
511 .sendpage = sock_no_sendpage,
512 };
514 static const struct proto_ops unix_dgram_ops = {
515 .family = PF_UNIX,
516 .owner = THIS_MODULE,
517 .release = unix_release,
518 .bind = unix_bind,
519 .connect = unix_dgram_connect,
520 .socketpair = unix_socketpair,
521 .accept = sock_no_accept,
522 .getname = unix_getname,
523 .poll = datagram_poll,
524 .ioctl = unix_ioctl,
525 .listen = sock_no_listen,
526 .shutdown = unix_shutdown,
527 .setsockopt = sock_no_setsockopt,
528 .getsockopt = sock_no_getsockopt,
529 .sendmsg = unix_dgram_sendmsg,
530 .recvmsg = unix_dgram_recvmsg,
531 .mmap = sock_no_mmap,
532 .sendpage = sock_no_sendpage,
533 };
535 static const struct proto_ops unix_seqpacket_ops = {
536 .family = PF_UNIX,
537 .owner = THIS_MODULE,
538 .release = unix_release,
539 .bind = unix_bind,
540 .connect = unix_stream_connect,
541 .socketpair = unix_socketpair,
542 .accept = unix_accept,
543 .getname = unix_getname,
544 .poll = datagram_poll,
545 .ioctl = unix_ioctl,
546 .listen = unix_listen,
547 .shutdown = unix_shutdown,
548 .setsockopt = sock_no_setsockopt,
549 .getsockopt = sock_no_getsockopt,
550 .sendmsg = unix_seqpacket_sendmsg,
551 .recvmsg = unix_dgram_recvmsg,
552 .mmap = sock_no_mmap,
553 .sendpage = sock_no_sendpage,
554 };
556 static struct proto unix_proto = {
557 .name = "UNIX",
558 .owner = THIS_MODULE,
559 .obj_size = sizeof(struct unix_sock),
560 };
562 /*
563 * AF_UNIX sockets do not interact with hardware, hence they
564 * dont trigger interrupts - so it's safe for them to have
565 * bh-unsafe locking for their sk_receive_queue.lock. Split off
566 * this special lock-class by reinitializing the spinlock key:
567 */
568 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
570 static struct sock * unix_create1(struct socket *sock)
571 {
572 struct sock *sk = NULL;
573 struct unix_sock *u;
575 if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
576 goto out;
578 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579 if (!sk)
580 goto out;
582 atomic_inc(&unix_nr_socks);
584 sock_init_data(sock,sk);
585 lockdep_set_class(&sk->sk_receive_queue.lock,
586 &af_unix_sk_receive_queue_lock_key);
588 sk->sk_write_space = unix_write_space;
589 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
590 sk->sk_destruct = unix_sock_destructor;
591 u = unix_sk(sk);
592 u->dentry = NULL;
593 u->mnt = NULL;
594 spin_lock_init(&u->lock);
595 atomic_set(&u->inflight, sock ? 0 : -1);
596 mutex_init(&u->readlock); /* single task reading lock */
597 init_waitqueue_head(&u->peer_wait);
598 unix_insert_socket(unix_sockets_unbound, sk);
599 out:
600 return sk;
601 }
603 static int unix_create(struct socket *sock, int protocol)
604 {
605 if (protocol && protocol != PF_UNIX)
606 return -EPROTONOSUPPORT;
608 sock->state = SS_UNCONNECTED;
610 switch (sock->type) {
611 case SOCK_STREAM:
612 sock->ops = &unix_stream_ops;
613 break;
614 /*
615 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
616 * nothing uses it.
617 */
618 case SOCK_RAW:
619 sock->type=SOCK_DGRAM;
620 case SOCK_DGRAM:
621 sock->ops = &unix_dgram_ops;
622 break;
623 case SOCK_SEQPACKET:
624 sock->ops = &unix_seqpacket_ops;
625 break;
626 default:
627 return -ESOCKTNOSUPPORT;
628 }
630 return unix_create1(sock) ? 0 : -ENOMEM;
631 }
633 static int unix_release(struct socket *sock)
634 {
635 struct sock *sk = sock->sk;
637 if (!sk)
638 return 0;
640 sock->sk = NULL;
642 return unix_release_sock (sk, 0);
643 }
645 static int unix_autobind(struct socket *sock)
646 {
647 struct sock *sk = sock->sk;
648 struct unix_sock *u = unix_sk(sk);
649 static u32 ordernum = 1;
650 struct unix_address * addr;
651 int err;
653 mutex_lock(&u->readlock);
655 err = 0;
656 if (u->addr)
657 goto out;
659 err = -ENOMEM;
660 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
661 if (!addr)
662 goto out;
664 addr->name->sun_family = AF_UNIX;
665 atomic_set(&addr->refcnt, 1);
667 retry:
668 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
669 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
671 spin_lock(&unix_table_lock);
672 ordernum = (ordernum+1)&0xFFFFF;
674 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
675 addr->hash)) {
676 spin_unlock(&unix_table_lock);
677 /* Sanity yield. It is unusual case, but yet... */
678 if (!(ordernum&0xFF))
679 yield();
680 goto retry;
681 }
682 addr->hash ^= sk->sk_type;
684 __unix_remove_socket(sk);
685 u->addr = addr;
686 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
687 spin_unlock(&unix_table_lock);
688 err = 0;
690 out: mutex_unlock(&u->readlock);
691 return err;
692 }
694 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
695 int type, unsigned hash, int *error)
696 {
697 struct sock *u;
698 struct nameidata nd;
699 int err = 0;
701 if (sunname->sun_path[0]) {
702 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
703 if (err)
704 goto fail;
705 err = vfs_permission(&nd, MAY_WRITE);
706 if (err)
707 goto put_fail;
709 err = -ECONNREFUSED;
710 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
711 goto put_fail;
712 u=unix_find_socket_byinode(nd.dentry->d_inode);
713 if (!u)
714 goto put_fail;
716 if (u->sk_type == type)
717 touch_atime(nd.mnt, nd.dentry);
719 path_release(&nd);
721 err=-EPROTOTYPE;
722 if (u->sk_type != type) {
723 sock_put(u);
724 goto fail;
725 }
726 } else {
727 err = -ECONNREFUSED;
728 u=unix_find_socket_byname(sunname, len, type, hash);
729 if (u) {
730 struct dentry *dentry;
731 dentry = unix_sk(u)->dentry;
732 if (dentry)
733 touch_atime(unix_sk(u)->mnt, dentry);
734 } else
735 goto fail;
736 }
737 return u;
739 put_fail:
740 path_release(&nd);
741 fail:
742 *error=err;
743 return NULL;
744 }
747 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
748 {
749 struct sock *sk = sock->sk;
750 struct unix_sock *u = unix_sk(sk);
751 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
752 struct dentry * dentry = NULL;
753 struct nameidata nd;
754 int err;
755 unsigned hash;
756 struct unix_address *addr;
757 struct hlist_head *list;
759 err = -EINVAL;
760 if (sunaddr->sun_family != AF_UNIX)
761 goto out;
763 if (addr_len==sizeof(short)) {
764 err = unix_autobind(sock);
765 goto out;
766 }
768 err = unix_mkname(sunaddr, addr_len, &hash);
769 if (err < 0)
770 goto out;
771 addr_len = err;
773 mutex_lock(&u->readlock);
775 err = -EINVAL;
776 if (u->addr)
777 goto out_up;
779 err = -ENOMEM;
780 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
781 if (!addr)
782 goto out_up;
784 memcpy(addr->name, sunaddr, addr_len);
785 addr->len = addr_len;
786 addr->hash = hash ^ sk->sk_type;
787 atomic_set(&addr->refcnt, 1);
789 if (sunaddr->sun_path[0]) {
790 unsigned int mode;
791 err = 0;
792 /*
793 * Get the parent directory, calculate the hash for last
794 * component.
795 */
796 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
797 if (err)
798 goto out_mknod_parent;
800 dentry = lookup_create(&nd, 0);
801 err = PTR_ERR(dentry);
802 if (IS_ERR(dentry))
803 goto out_mknod_unlock;
805 /*
806 * All right, let's create it.
807 */
808 mode = S_IFSOCK |
809 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
810 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
811 if (err)
812 goto out_mknod_dput;
813 mutex_unlock(&nd.dentry->d_inode->i_mutex);
814 dput(nd.dentry);
815 nd.dentry = dentry;
817 addr->hash = UNIX_HASH_SIZE;
818 }
820 spin_lock(&unix_table_lock);
822 if (!sunaddr->sun_path[0]) {
823 err = -EADDRINUSE;
824 if (__unix_find_socket_byname(sunaddr, addr_len,
825 sk->sk_type, hash)) {
826 unix_release_addr(addr);
827 goto out_unlock;
828 }
830 list = &unix_socket_table[addr->hash];
831 } else {
832 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
833 u->dentry = nd.dentry;
834 u->mnt = nd.mnt;
835 }
837 err = 0;
838 __unix_remove_socket(sk);
839 u->addr = addr;
840 __unix_insert_socket(list, sk);
842 out_unlock:
843 spin_unlock(&unix_table_lock);
844 out_up:
845 mutex_unlock(&u->readlock);
846 out:
847 return err;
849 out_mknod_dput:
850 dput(dentry);
851 out_mknod_unlock:
852 mutex_unlock(&nd.dentry->d_inode->i_mutex);
853 path_release(&nd);
854 out_mknod_parent:
855 if (err==-EEXIST)
856 err=-EADDRINUSE;
857 unix_release_addr(addr);
858 goto out_up;
859 }
861 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
862 int alen, int flags)
863 {
864 struct sock *sk = sock->sk;
865 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
866 struct sock *other;
867 unsigned hash;
868 int err;
870 if (addr->sa_family != AF_UNSPEC) {
871 err = unix_mkname(sunaddr, alen, &hash);
872 if (err < 0)
873 goto out;
874 alen = err;
876 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
877 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
878 goto out;
880 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
881 if (!other)
882 goto out;
884 unix_state_wlock(sk);
886 err = -EPERM;
887 if (!unix_may_send(sk, other))
888 goto out_unlock;
890 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
891 if (err)
892 goto out_unlock;
894 } else {
895 /*
896 * 1003.1g breaking connected state with AF_UNSPEC
897 */
898 other = NULL;
899 unix_state_wlock(sk);
900 }
902 /*
903 * If it was connected, reconnect.
904 */
905 if (unix_peer(sk)) {
906 struct sock *old_peer = unix_peer(sk);
907 unix_peer(sk)=other;
908 unix_state_wunlock(sk);
910 if (other != old_peer)
911 unix_dgram_disconnected(sk, old_peer);
912 sock_put(old_peer);
913 } else {
914 unix_peer(sk)=other;
915 unix_state_wunlock(sk);
916 }
917 return 0;
919 out_unlock:
920 unix_state_wunlock(sk);
921 sock_put(other);
922 out:
923 return err;
924 }
926 static long unix_wait_for_peer(struct sock *other, long timeo)
927 {
928 struct unix_sock *u = unix_sk(other);
929 int sched;
930 DEFINE_WAIT(wait);
932 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
934 sched = !sock_flag(other, SOCK_DEAD) &&
935 !(other->sk_shutdown & RCV_SHUTDOWN) &&
936 (skb_queue_len(&other->sk_receive_queue) >
937 other->sk_max_ack_backlog);
939 unix_state_runlock(other);
941 if (sched)
942 timeo = schedule_timeout(timeo);
944 finish_wait(&u->peer_wait, &wait);
945 return timeo;
946 }
948 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
949 int addr_len, int flags)
950 {
951 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
952 struct sock *sk = sock->sk;
953 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
954 struct sock *newsk = NULL;
955 struct sock *other = NULL;
956 struct sk_buff *skb = NULL;
957 unsigned hash;
958 int st;
959 int err;
960 long timeo;
962 err = unix_mkname(sunaddr, addr_len, &hash);
963 if (err < 0)
964 goto out;
965 addr_len = err;
967 if (test_bit(SOCK_PASSCRED, &sock->flags)
968 && !u->addr && (err = unix_autobind(sock)) != 0)
969 goto out;
971 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
973 /* First of all allocate resources.
974 If we will make it after state is locked,
975 we will have to recheck all again in any case.
976 */
978 err = -ENOMEM;
980 /* create new sock for complete connection */
981 newsk = unix_create1(NULL);
982 if (newsk == NULL)
983 goto out;
985 /* Allocate skb for sending to listening sock */
986 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
987 if (skb == NULL)
988 goto out;
990 restart:
991 /* Find listening sock. */
992 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
993 if (!other)
994 goto out;
996 /* Latch state of peer */
997 unix_state_rlock(other);
999 /* Apparently VFS overslept socket death. Retry. */
1000 if (sock_flag(other, SOCK_DEAD)) {
1001 unix_state_runlock(other);
1002 sock_put(other);
1003 goto restart;
1006 err = -ECONNREFUSED;
1007 if (other->sk_state != TCP_LISTEN)
1008 goto out_unlock;
1010 if (skb_queue_len(&other->sk_receive_queue) >
1011 other->sk_max_ack_backlog) {
1012 err = -EAGAIN;
1013 if (!timeo)
1014 goto out_unlock;
1016 timeo = unix_wait_for_peer(other, timeo);
1018 err = sock_intr_errno(timeo);
1019 if (signal_pending(current))
1020 goto out;
1021 sock_put(other);
1022 goto restart;
1025 /* Latch our state.
1027 It is tricky place. We need to grab write lock and cannot
1028 drop lock on peer. It is dangerous because deadlock is
1029 possible. Connect to self case and simultaneous
1030 attempt to connect are eliminated by checking socket
1031 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1032 check this before attempt to grab lock.
1034 Well, and we have to recheck the state after socket locked.
1035 */
1036 st = sk->sk_state;
1038 switch (st) {
1039 case TCP_CLOSE:
1040 /* This is ok... continue with connect */
1041 break;
1042 case TCP_ESTABLISHED:
1043 /* Socket is already connected */
1044 err = -EISCONN;
1045 goto out_unlock;
1046 default:
1047 err = -EINVAL;
1048 goto out_unlock;
1051 unix_state_wlock_nested(sk);
1053 if (sk->sk_state != st) {
1054 unix_state_wunlock(sk);
1055 unix_state_runlock(other);
1056 sock_put(other);
1057 goto restart;
1060 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1061 if (err) {
1062 unix_state_wunlock(sk);
1063 goto out_unlock;
1066 /* The way is open! Fastly set all the necessary fields... */
1068 sock_hold(sk);
1069 unix_peer(newsk) = sk;
1070 newsk->sk_state = TCP_ESTABLISHED;
1071 newsk->sk_type = sk->sk_type;
1072 newsk->sk_peercred.pid = current->tgid;
1073 newsk->sk_peercred.uid = current->euid;
1074 newsk->sk_peercred.gid = current->egid;
1075 newu = unix_sk(newsk);
1076 newsk->sk_sleep = &newu->peer_wait;
1077 otheru = unix_sk(other);
1079 /* copy address information from listening to new sock*/
1080 if (otheru->addr) {
1081 atomic_inc(&otheru->addr->refcnt);
1082 newu->addr = otheru->addr;
1084 if (otheru->dentry) {
1085 newu->dentry = dget(otheru->dentry);
1086 newu->mnt = mntget(otheru->mnt);
1089 /* Set credentials */
1090 sk->sk_peercred = other->sk_peercred;
1092 sock->state = SS_CONNECTED;
1093 sk->sk_state = TCP_ESTABLISHED;
1094 sock_hold(newsk);
1096 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1097 unix_peer(sk) = newsk;
1099 unix_state_wunlock(sk);
1101 /* take ten and and send info to listening sock */
1102 spin_lock(&other->sk_receive_queue.lock);
1103 __skb_queue_tail(&other->sk_receive_queue, skb);
1104 /* Undo artificially decreased inflight after embrion
1105 * is installed to listening socket. */
1106 atomic_inc(&newu->inflight);
1107 spin_unlock(&other->sk_receive_queue.lock);
1108 unix_state_runlock(other);
1109 other->sk_data_ready(other, 0);
1110 sock_put(other);
1111 return 0;
1113 out_unlock:
1114 if (other)
1115 unix_state_runlock(other);
1117 out:
1118 if (skb)
1119 kfree_skb(skb);
1120 if (newsk)
1121 unix_release_sock(newsk, 0);
1122 if (other)
1123 sock_put(other);
1124 return err;
1127 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1129 struct sock *ska=socka->sk, *skb = sockb->sk;
1131 /* Join our sockets back to back */
1132 sock_hold(ska);
1133 sock_hold(skb);
1134 unix_peer(ska)=skb;
1135 unix_peer(skb)=ska;
1136 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1137 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1138 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1140 if (ska->sk_type != SOCK_DGRAM) {
1141 ska->sk_state = TCP_ESTABLISHED;
1142 skb->sk_state = TCP_ESTABLISHED;
1143 socka->state = SS_CONNECTED;
1144 sockb->state = SS_CONNECTED;
1146 return 0;
1149 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1151 struct sock *sk = sock->sk;
1152 struct sock *tsk;
1153 struct sk_buff *skb;
1154 int err;
1156 err = -EOPNOTSUPP;
1157 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1158 goto out;
1160 err = -EINVAL;
1161 if (sk->sk_state != TCP_LISTEN)
1162 goto out;
1164 /* If socket state is TCP_LISTEN it cannot change (for now...),
1165 * so that no locks are necessary.
1166 */
1168 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1169 if (!skb) {
1170 /* This means receive shutdown. */
1171 if (err == 0)
1172 err = -EINVAL;
1173 goto out;
1176 tsk = skb->sk;
1177 skb_free_datagram(sk, skb);
1178 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1180 /* attach accepted sock to socket */
1181 unix_state_wlock(tsk);
1182 newsock->state = SS_CONNECTED;
1183 sock_graft(tsk, newsock);
1184 unix_state_wunlock(tsk);
1185 return 0;
1187 out:
1188 return err;
1192 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1194 struct sock *sk = sock->sk;
1195 struct unix_sock *u;
1196 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1197 int err = 0;
1199 if (peer) {
1200 sk = unix_peer_get(sk);
1202 err = -ENOTCONN;
1203 if (!sk)
1204 goto out;
1205 err = 0;
1206 } else {
1207 sock_hold(sk);
1210 u = unix_sk(sk);
1211 unix_state_rlock(sk);
1212 if (!u->addr) {
1213 sunaddr->sun_family = AF_UNIX;
1214 sunaddr->sun_path[0] = 0;
1215 *uaddr_len = sizeof(short);
1216 } else {
1217 struct unix_address *addr = u->addr;
1219 *uaddr_len = addr->len;
1220 memcpy(sunaddr, addr->name, *uaddr_len);
1222 unix_state_runlock(sk);
1223 sock_put(sk);
1224 out:
1225 return err;
1228 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1230 int i;
1232 scm->fp = UNIXCB(skb).fp;
1233 skb->destructor = sock_wfree;
1234 UNIXCB(skb).fp = NULL;
1236 for (i=scm->fp->count-1; i>=0; i--)
1237 unix_notinflight(scm->fp->fp[i]);
1240 static void unix_destruct_fds(struct sk_buff *skb)
1242 struct scm_cookie scm;
1243 memset(&scm, 0, sizeof(scm));
1244 unix_detach_fds(&scm, skb);
1246 /* Alas, it calls VFS */
1247 /* So fscking what? fput() had been SMP-safe since the last Summer */
1248 scm_destroy(&scm);
1249 sock_wfree(skb);
1252 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1254 int i;
1255 for (i=scm->fp->count-1; i>=0; i--)
1256 unix_inflight(scm->fp->fp[i]);
1257 UNIXCB(skb).fp = scm->fp;
1258 skb->destructor = unix_destruct_fds;
1259 scm->fp = NULL;
1262 /*
1263 * Send AF_UNIX data.
1264 */
1266 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1267 struct msghdr *msg, size_t len)
1269 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1270 struct sock *sk = sock->sk;
1271 struct unix_sock *u = unix_sk(sk);
1272 struct sockaddr_un *sunaddr=msg->msg_name;
1273 struct sock *other = NULL;
1274 int namelen = 0; /* fake GCC */
1275 int err;
1276 unsigned hash;
1277 struct sk_buff *skb;
1278 long timeo;
1279 struct scm_cookie tmp_scm;
1281 if (NULL == siocb->scm)
1282 siocb->scm = &tmp_scm;
1283 err = scm_send(sock, msg, siocb->scm);
1284 if (err < 0)
1285 return err;
1287 err = -EOPNOTSUPP;
1288 if (msg->msg_flags&MSG_OOB)
1289 goto out;
1291 if (msg->msg_namelen) {
1292 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1293 if (err < 0)
1294 goto out;
1295 namelen = err;
1296 } else {
1297 sunaddr = NULL;
1298 err = -ENOTCONN;
1299 other = unix_peer_get(sk);
1300 if (!other)
1301 goto out;
1304 if (test_bit(SOCK_PASSCRED, &sock->flags)
1305 && !u->addr && (err = unix_autobind(sock)) != 0)
1306 goto out;
1308 err = -EMSGSIZE;
1309 if (len > sk->sk_sndbuf - 32)
1310 goto out;
1312 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1313 if (skb==NULL)
1314 goto out;
1316 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1317 if (siocb->scm->fp)
1318 unix_attach_fds(siocb->scm, skb);
1319 unix_get_secdata(siocb->scm, skb);
1321 skb->h.raw = skb->data;
1322 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1323 if (err)
1324 goto out_free;
1326 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1328 restart:
1329 if (!other) {
1330 err = -ECONNRESET;
1331 if (sunaddr == NULL)
1332 goto out_free;
1334 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1335 hash, &err);
1336 if (other==NULL)
1337 goto out_free;
1340 unix_state_rlock(other);
1341 err = -EPERM;
1342 if (!unix_may_send(sk, other))
1343 goto out_unlock;
1345 if (sock_flag(other, SOCK_DEAD)) {
1346 /*
1347 * Check with 1003.1g - what should
1348 * datagram error
1349 */
1350 unix_state_runlock(other);
1351 sock_put(other);
1353 err = 0;
1354 unix_state_wlock(sk);
1355 if (unix_peer(sk) == other) {
1356 unix_peer(sk)=NULL;
1357 unix_state_wunlock(sk);
1359 unix_dgram_disconnected(sk, other);
1360 sock_put(other);
1361 err = -ECONNREFUSED;
1362 } else {
1363 unix_state_wunlock(sk);
1366 other = NULL;
1367 if (err)
1368 goto out_free;
1369 goto restart;
1372 err = -EPIPE;
1373 if (other->sk_shutdown & RCV_SHUTDOWN)
1374 goto out_unlock;
1376 if (sk->sk_type != SOCK_SEQPACKET) {
1377 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1378 if (err)
1379 goto out_unlock;
1382 if (unix_peer(other) != sk &&
1383 (skb_queue_len(&other->sk_receive_queue) >
1384 other->sk_max_ack_backlog)) {
1385 if (!timeo) {
1386 err = -EAGAIN;
1387 goto out_unlock;
1390 timeo = unix_wait_for_peer(other, timeo);
1392 err = sock_intr_errno(timeo);
1393 if (signal_pending(current))
1394 goto out_free;
1396 goto restart;
1399 skb_queue_tail(&other->sk_receive_queue, skb);
1400 unix_state_runlock(other);
1401 other->sk_data_ready(other, len);
1402 sock_put(other);
1403 scm_destroy(siocb->scm);
1404 return len;
1406 out_unlock:
1407 unix_state_runlock(other);
1408 out_free:
1409 kfree_skb(skb);
1410 out:
1411 if (other)
1412 sock_put(other);
1413 scm_destroy(siocb->scm);
1414 return err;
1418 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1419 struct msghdr *msg, size_t len)
1421 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1422 struct sock *sk = sock->sk;
1423 struct sock *other = NULL;
1424 struct sockaddr_un *sunaddr=msg->msg_name;
1425 int err,size;
1426 struct sk_buff *skb;
1427 int sent=0;
1428 struct scm_cookie tmp_scm;
1430 if (NULL == siocb->scm)
1431 siocb->scm = &tmp_scm;
1432 err = scm_send(sock, msg, siocb->scm);
1433 if (err < 0)
1434 return err;
1436 err = -EOPNOTSUPP;
1437 if (msg->msg_flags&MSG_OOB)
1438 goto out_err;
1440 if (msg->msg_namelen) {
1441 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1442 goto out_err;
1443 } else {
1444 sunaddr = NULL;
1445 err = -ENOTCONN;
1446 other = unix_peer(sk);
1447 if (!other)
1448 goto out_err;
1451 if (sk->sk_shutdown & SEND_SHUTDOWN)
1452 goto pipe_err;
1454 while(sent < len)
1456 /*
1457 * Optimisation for the fact that under 0.01% of X
1458 * messages typically need breaking up.
1459 */
1461 size = len-sent;
1463 /* Keep two messages in the pipe so it schedules better */
1464 if (size > ((sk->sk_sndbuf >> 1) - 64))
1465 size = (sk->sk_sndbuf >> 1) - 64;
1467 if (size > SKB_MAX_ALLOC)
1468 size = SKB_MAX_ALLOC;
1470 /*
1471 * Grab a buffer
1472 */
1474 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1476 if (skb==NULL)
1477 goto out_err;
1479 /*
1480 * If you pass two values to the sock_alloc_send_skb
1481 * it tries to grab the large buffer with GFP_NOFS
1482 * (which can fail easily), and if it fails grab the
1483 * fallback size buffer which is under a page and will
1484 * succeed. [Alan]
1485 */
1486 size = min_t(int, size, skb_tailroom(skb));
1488 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1489 if (siocb->scm->fp)
1490 unix_attach_fds(siocb->scm, skb);
1492 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1493 kfree_skb(skb);
1494 goto out_err;
1497 unix_state_rlock(other);
1499 if (sock_flag(other, SOCK_DEAD) ||
1500 (other->sk_shutdown & RCV_SHUTDOWN))
1501 goto pipe_err_free;
1503 skb_queue_tail(&other->sk_receive_queue, skb);
1504 unix_state_runlock(other);
1505 other->sk_data_ready(other, size);
1506 sent+=size;
1509 scm_destroy(siocb->scm);
1510 siocb->scm = NULL;
1512 return sent;
1514 pipe_err_free:
1515 unix_state_runlock(other);
1516 kfree_skb(skb);
1517 pipe_err:
1518 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1519 send_sig(SIGPIPE,current,0);
1520 err = -EPIPE;
1521 out_err:
1522 scm_destroy(siocb->scm);
1523 siocb->scm = NULL;
1524 return sent ? : err;
1527 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1528 struct msghdr *msg, size_t len)
1530 int err;
1531 struct sock *sk = sock->sk;
1533 err = sock_error(sk);
1534 if (err)
1535 return err;
1537 if (sk->sk_state != TCP_ESTABLISHED)
1538 return -ENOTCONN;
1540 if (msg->msg_namelen)
1541 msg->msg_namelen = 0;
1543 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1546 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1548 struct unix_sock *u = unix_sk(sk);
1550 msg->msg_namelen = 0;
1551 if (u->addr) {
1552 msg->msg_namelen = u->addr->len;
1553 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1557 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1558 struct msghdr *msg, size_t size,
1559 int flags)
1561 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1562 struct scm_cookie tmp_scm;
1563 struct sock *sk = sock->sk;
1564 struct unix_sock *u = unix_sk(sk);
1565 int noblock = flags & MSG_DONTWAIT;
1566 struct sk_buff *skb;
1567 int err;
1569 err = -EOPNOTSUPP;
1570 if (flags&MSG_OOB)
1571 goto out;
1573 msg->msg_namelen = 0;
1575 mutex_lock(&u->readlock);
1577 skb = skb_recv_datagram(sk, flags, noblock, &err);
1578 if (!skb)
1579 goto out_unlock;
1581 wake_up_interruptible(&u->peer_wait);
1583 if (msg->msg_name)
1584 unix_copy_addr(msg, skb->sk);
1586 if (size > skb->len)
1587 size = skb->len;
1588 else if (size < skb->len)
1589 msg->msg_flags |= MSG_TRUNC;
1591 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1592 if (err)
1593 goto out_free;
1595 if (!siocb->scm) {
1596 siocb->scm = &tmp_scm;
1597 memset(&tmp_scm, 0, sizeof(tmp_scm));
1599 siocb->scm->creds = *UNIXCREDS(skb);
1600 unix_set_secdata(siocb->scm, skb);
1602 if (!(flags & MSG_PEEK))
1604 if (UNIXCB(skb).fp)
1605 unix_detach_fds(siocb->scm, skb);
1607 else
1609 /* It is questionable: on PEEK we could:
1610 - do not return fds - good, but too simple 8)
1611 - return fds, and do not return them on read (old strategy,
1612 apparently wrong)
1613 - clone fds (I chose it for now, it is the most universal
1614 solution)
1616 POSIX 1003.1g does not actually define this clearly
1617 at all. POSIX 1003.1g doesn't define a lot of things
1618 clearly however!
1620 */
1621 if (UNIXCB(skb).fp)
1622 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1624 err = size;
1626 scm_recv(sock, msg, siocb->scm, flags);
1628 out_free:
1629 skb_free_datagram(sk,skb);
1630 out_unlock:
1631 mutex_unlock(&u->readlock);
1632 out:
1633 return err;
1636 /*
1637 * Sleep until data has arrive. But check for races..
1638 */
1640 static long unix_stream_data_wait(struct sock * sk, long timeo)
1642 DEFINE_WAIT(wait);
1644 unix_state_rlock(sk);
1646 for (;;) {
1647 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1649 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1650 sk->sk_err ||
1651 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1652 signal_pending(current) ||
1653 !timeo)
1654 break;
1656 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1657 unix_state_runlock(sk);
1658 timeo = schedule_timeout(timeo);
1659 unix_state_rlock(sk);
1660 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1663 finish_wait(sk->sk_sleep, &wait);
1664 unix_state_runlock(sk);
1665 return timeo;
1670 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1671 struct msghdr *msg, size_t size,
1672 int flags)
1674 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1675 struct scm_cookie tmp_scm;
1676 struct sock *sk = sock->sk;
1677 struct unix_sock *u = unix_sk(sk);
1678 struct sockaddr_un *sunaddr=msg->msg_name;
1679 int copied = 0;
1680 int check_creds = 0;
1681 int target;
1682 int err = 0;
1683 long timeo;
1685 err = -EINVAL;
1686 if (sk->sk_state != TCP_ESTABLISHED)
1687 goto out;
1689 err = -EOPNOTSUPP;
1690 if (flags&MSG_OOB)
1691 goto out;
1693 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1694 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1696 msg->msg_namelen = 0;
1698 /* Lock the socket to prevent queue disordering
1699 * while sleeps in memcpy_tomsg
1700 */
1702 if (!siocb->scm) {
1703 siocb->scm = &tmp_scm;
1704 memset(&tmp_scm, 0, sizeof(tmp_scm));
1707 mutex_lock(&u->readlock);
1709 do
1711 int chunk;
1712 struct sk_buff *skb;
1714 skb = skb_dequeue(&sk->sk_receive_queue);
1715 if (skb==NULL)
1717 if (copied >= target)
1718 break;
1720 /*
1721 * POSIX 1003.1g mandates this order.
1722 */
1724 if ((err = sock_error(sk)) != 0)
1725 break;
1726 if (sk->sk_shutdown & RCV_SHUTDOWN)
1727 break;
1728 err = -EAGAIN;
1729 if (!timeo)
1730 break;
1731 mutex_unlock(&u->readlock);
1733 timeo = unix_stream_data_wait(sk, timeo);
1735 if (signal_pending(current)) {
1736 err = sock_intr_errno(timeo);
1737 goto out;
1739 mutex_lock(&u->readlock);
1740 continue;
1743 if (check_creds) {
1744 /* Never glue messages from different writers */
1745 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1746 skb_queue_head(&sk->sk_receive_queue, skb);
1747 break;
1749 } else {
1750 /* Copy credentials */
1751 siocb->scm->creds = *UNIXCREDS(skb);
1752 check_creds = 1;
1755 /* Copy address just once */
1756 if (sunaddr)
1758 unix_copy_addr(msg, skb->sk);
1759 sunaddr = NULL;
1762 chunk = min_t(unsigned int, skb->len, size);
1763 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1764 skb_queue_head(&sk->sk_receive_queue, skb);
1765 if (copied == 0)
1766 copied = -EFAULT;
1767 break;
1769 copied += chunk;
1770 size -= chunk;
1772 /* Mark read part of skb as used */
1773 if (!(flags & MSG_PEEK))
1775 skb_pull(skb, chunk);
1777 if (UNIXCB(skb).fp)
1778 unix_detach_fds(siocb->scm, skb);
1780 /* put the skb back if we didn't use it up.. */
1781 if (skb->len)
1783 skb_queue_head(&sk->sk_receive_queue, skb);
1784 break;
1787 kfree_skb(skb);
1789 if (siocb->scm->fp)
1790 break;
1792 else
1794 /* It is questionable, see note in unix_dgram_recvmsg.
1795 */
1796 if (UNIXCB(skb).fp)
1797 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1799 /* put message back and return */
1800 skb_queue_head(&sk->sk_receive_queue, skb);
1801 break;
1803 } while (size);
1805 mutex_unlock(&u->readlock);
1806 scm_recv(sock, msg, siocb->scm, flags);
1807 out:
1808 return copied ? : err;
1811 static int unix_shutdown(struct socket *sock, int mode)
1813 struct sock *sk = sock->sk;
1814 struct sock *other;
1816 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1818 if (mode) {
1819 unix_state_wlock(sk);
1820 sk->sk_shutdown |= mode;
1821 other=unix_peer(sk);
1822 if (other)
1823 sock_hold(other);
1824 unix_state_wunlock(sk);
1825 sk->sk_state_change(sk);
1827 if (other &&
1828 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1830 int peer_mode = 0;
1832 if (mode&RCV_SHUTDOWN)
1833 peer_mode |= SEND_SHUTDOWN;
1834 if (mode&SEND_SHUTDOWN)
1835 peer_mode |= RCV_SHUTDOWN;
1836 unix_state_wlock(other);
1837 other->sk_shutdown |= peer_mode;
1838 unix_state_wunlock(other);
1839 other->sk_state_change(other);
1840 read_lock(&other->sk_callback_lock);
1841 if (peer_mode == SHUTDOWN_MASK)
1842 sk_wake_async(other,1,POLL_HUP);
1843 else if (peer_mode & RCV_SHUTDOWN)
1844 sk_wake_async(other,1,POLL_IN);
1845 read_unlock(&other->sk_callback_lock);
1847 if (other)
1848 sock_put(other);
1850 return 0;
1853 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1855 struct sock *sk = sock->sk;
1856 long amount=0;
1857 int err;
1859 switch(cmd)
1861 case SIOCOUTQ:
1862 amount = atomic_read(&sk->sk_wmem_alloc);
1863 err = put_user(amount, (int __user *)arg);
1864 break;
1865 case SIOCINQ:
1867 struct sk_buff *skb;
1869 if (sk->sk_state == TCP_LISTEN) {
1870 err = -EINVAL;
1871 break;
1874 spin_lock(&sk->sk_receive_queue.lock);
1875 if (sk->sk_type == SOCK_STREAM ||
1876 sk->sk_type == SOCK_SEQPACKET) {
1877 skb_queue_walk(&sk->sk_receive_queue, skb)
1878 amount += skb->len;
1879 } else {
1880 skb = skb_peek(&sk->sk_receive_queue);
1881 if (skb)
1882 amount=skb->len;
1884 spin_unlock(&sk->sk_receive_queue.lock);
1885 err = put_user(amount, (int __user *)arg);
1886 break;
1889 default:
1890 err = -ENOIOCTLCMD;
1891 break;
1893 return err;
1896 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1898 struct sock *sk = sock->sk;
1899 unsigned int mask;
1901 poll_wait(file, sk->sk_sleep, wait);
1902 mask = 0;
1904 /* exceptional events? */
1905 if (sk->sk_err)
1906 mask |= POLLERR;
1907 if (sk->sk_shutdown == SHUTDOWN_MASK)
1908 mask |= POLLHUP;
1909 if (sk->sk_shutdown & RCV_SHUTDOWN)
1910 mask |= POLLRDHUP;
1912 /* readable? */
1913 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1914 (sk->sk_shutdown & RCV_SHUTDOWN))
1915 mask |= POLLIN | POLLRDNORM;
1917 /* Connection-based need to check for termination and startup */
1918 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1919 mask |= POLLHUP;
1921 /*
1922 * we set writable also when the other side has shut down the
1923 * connection. This prevents stuck sockets.
1924 */
1925 if (unix_writable(sk))
1926 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1928 return mask;
1932 #ifdef CONFIG_PROC_FS
1933 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1935 loff_t off = 0;
1936 struct sock *s;
1938 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1939 if (off == pos)
1940 return s;
1941 ++off;
1943 return NULL;
1947 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1949 spin_lock(&unix_table_lock);
1950 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1953 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1955 ++*pos;
1957 if (v == (void *)1)
1958 return first_unix_socket(seq->private);
1959 return next_unix_socket(seq->private, v);
1962 static void unix_seq_stop(struct seq_file *seq, void *v)
1964 spin_unlock(&unix_table_lock);
1967 static int unix_seq_show(struct seq_file *seq, void *v)
1970 if (v == (void *)1)
1971 seq_puts(seq, "Num RefCount Protocol Flags Type St "
1972 "Inode Path\n");
1973 else {
1974 struct sock *s = v;
1975 struct unix_sock *u = unix_sk(s);
1976 unix_state_rlock(s);
1978 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1979 s,
1980 atomic_read(&s->sk_refcnt),
1981 0,
1982 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1983 s->sk_type,
1984 s->sk_socket ?
1985 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1986 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1987 sock_i_ino(s));
1989 if (u->addr) {
1990 int i, len;
1991 seq_putc(seq, ' ');
1993 i = 0;
1994 len = u->addr->len - sizeof(short);
1995 if (!UNIX_ABSTRACT(s))
1996 len--;
1997 else {
1998 seq_putc(seq, '@');
1999 i++;
2001 for ( ; i < len; i++)
2002 seq_putc(seq, u->addr->name->sun_path[i]);
2004 unix_state_runlock(s);
2005 seq_putc(seq, '\n');
2008 return 0;
2011 static struct seq_operations unix_seq_ops = {
2012 .start = unix_seq_start,
2013 .next = unix_seq_next,
2014 .stop = unix_seq_stop,
2015 .show = unix_seq_show,
2016 };
2019 static int unix_seq_open(struct inode *inode, struct file *file)
2021 struct seq_file *seq;
2022 int rc = -ENOMEM;
2023 int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2025 if (!iter)
2026 goto out;
2028 rc = seq_open(file, &unix_seq_ops);
2029 if (rc)
2030 goto out_kfree;
2032 seq = file->private_data;
2033 seq->private = iter;
2034 *iter = 0;
2035 out:
2036 return rc;
2037 out_kfree:
2038 kfree(iter);
2039 goto out;
2042 static struct file_operations unix_seq_fops = {
2043 .owner = THIS_MODULE,
2044 .open = unix_seq_open,
2045 .read = seq_read,
2046 .llseek = seq_lseek,
2047 .release = seq_release_private,
2048 };
2050 #endif
2052 static struct net_proto_family unix_family_ops = {
2053 .family = PF_UNIX,
2054 .create = unix_create,
2055 .owner = THIS_MODULE,
2056 };
2058 static int __init af_unix_init(void)
2060 int rc = -1;
2061 struct sk_buff *dummy_skb;
2063 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2064 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2065 goto out;
2068 rc = proto_register(&unix_proto, 1);
2069 if (rc != 0) {
2070 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2071 __FUNCTION__);
2072 goto out;
2075 sock_register(&unix_family_ops);
2076 #ifdef CONFIG_PROC_FS
2077 proc_net_fops_create("unix", 0, &unix_seq_fops);
2078 #endif
2079 unix_sysctl_register();
2080 out:
2081 return rc;
2084 static void __exit af_unix_exit(void)
2086 sock_unregister(PF_UNIX);
2087 unix_sysctl_unregister();
2088 proc_net_remove("unix");
2089 proto_unregister(&unix_proto);
2092 module_init(af_unix_init);
2093 module_exit(af_unix_exit);
2095 MODULE_LICENSE("GPL");
2096 MODULE_ALIAS_NETPROTO(PF_UNIX);