]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/pipe.c
[PATCH] Introduce sys_splice() system call
[net-next-2.6.git] / fs / pipe.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
5274f052 18#include <linux/pagemap.h>
1da177e4
LT
19
20#include <asm/uaccess.h>
21#include <asm/ioctls.h>
22
23/*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38/* Drop the inode semaphore and wait for a pipe event, atomically */
39void pipe_wait(struct inode * inode)
40{
41 DEFINE_WAIT(wait);
42
d79fc0fc
IM
43 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
47 prepare_to_wait(PIPE_WAIT(*inode), &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
1b1dcc1b 48 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
49 schedule();
50 finish_wait(PIPE_WAIT(*inode), &wait);
1b1dcc1b 51 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
52}
53
858119e1 54static int
1da177e4
LT
55pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
56{
57 unsigned long copy;
58
59 while (len > 0) {
60 while (!iov->iov_len)
61 iov++;
62 copy = min_t(unsigned long, len, iov->iov_len);
63
64 if (copy_from_user(to, iov->iov_base, copy))
65 return -EFAULT;
66 to += copy;
67 len -= copy;
68 iov->iov_base += copy;
69 iov->iov_len -= copy;
70 }
71 return 0;
72}
73
858119e1 74static int
1da177e4
LT
75pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
76{
77 unsigned long copy;
78
79 while (len > 0) {
80 while (!iov->iov_len)
81 iov++;
82 copy = min_t(unsigned long, len, iov->iov_len);
83
84 if (copy_to_user(iov->iov_base, from, copy))
85 return -EFAULT;
86 from += copy;
87 len -= copy;
88 iov->iov_base += copy;
89 iov->iov_len -= copy;
90 }
91 return 0;
92}
93
94static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
95{
96 struct page *page = buf->page;
97
5274f052
JA
98 /*
99 * If nobody else uses this page, and we don't already have a
100 * temporary page, let's keep track of it as a one-deep
101 * allocation cache
102 */
103 if (page_count(page) == 1 && !info->tmp_page) {
104 info->tmp_page = page;
1da177e4
LT
105 return;
106 }
5274f052
JA
107
108 /*
109 * Otherwise just release our reference to it
110 */
111 page_cache_release(page);
1da177e4
LT
112}
113
114static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
115{
116 return kmap(buf->page);
117}
118
119static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
120{
121 kunmap(buf->page);
122}
123
124static struct pipe_buf_operations anon_pipe_buf_ops = {
125 .can_merge = 1,
126 .map = anon_pipe_buf_map,
127 .unmap = anon_pipe_buf_unmap,
128 .release = anon_pipe_buf_release,
129};
130
131static ssize_t
132pipe_readv(struct file *filp, const struct iovec *_iov,
133 unsigned long nr_segs, loff_t *ppos)
134{
135 struct inode *inode = filp->f_dentry->d_inode;
136 struct pipe_inode_info *info;
137 int do_wakeup;
138 ssize_t ret;
139 struct iovec *iov = (struct iovec *)_iov;
140 size_t total_len;
141
142 total_len = iov_length(iov, nr_segs);
143 /* Null read succeeds. */
144 if (unlikely(total_len == 0))
145 return 0;
146
147 do_wakeup = 0;
148 ret = 0;
1b1dcc1b 149 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
150 info = inode->i_pipe;
151 for (;;) {
152 int bufs = info->nrbufs;
153 if (bufs) {
154 int curbuf = info->curbuf;
155 struct pipe_buffer *buf = info->bufs + curbuf;
156 struct pipe_buf_operations *ops = buf->ops;
157 void *addr;
158 size_t chars = buf->len;
159 int error;
160
161 if (chars > total_len)
162 chars = total_len;
163
164 addr = ops->map(filp, info, buf);
5274f052
JA
165 if (IS_ERR(addr)) {
166 if (!ret)
167 ret = PTR_ERR(addr);
168 break;
169 }
1da177e4
LT
170 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
171 ops->unmap(info, buf);
172 if (unlikely(error)) {
173 if (!ret) ret = -EFAULT;
174 break;
175 }
176 ret += chars;
177 buf->offset += chars;
178 buf->len -= chars;
179 if (!buf->len) {
180 buf->ops = NULL;
181 ops->release(info, buf);
182 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
183 info->curbuf = curbuf;
184 info->nrbufs = --bufs;
185 do_wakeup = 1;
186 }
187 total_len -= chars;
188 if (!total_len)
189 break; /* common path: read succeeded */
190 }
191 if (bufs) /* More to do? */
192 continue;
193 if (!PIPE_WRITERS(*inode))
194 break;
195 if (!PIPE_WAITING_WRITERS(*inode)) {
196 /* syscall merging: Usually we must not sleep
197 * if O_NONBLOCK is set, or if we got some data.
198 * But if a writer sleeps in kernel space, then
199 * we can wait for that data without violating POSIX.
200 */
201 if (ret)
202 break;
203 if (filp->f_flags & O_NONBLOCK) {
204 ret = -EAGAIN;
205 break;
206 }
207 }
208 if (signal_pending(current)) {
209 if (!ret) ret = -ERESTARTSYS;
210 break;
211 }
212 if (do_wakeup) {
213 wake_up_interruptible_sync(PIPE_WAIT(*inode));
214 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
215 }
216 pipe_wait(inode);
217 }
1b1dcc1b 218 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
219 /* Signal writers asynchronously that there is more room. */
220 if (do_wakeup) {
221 wake_up_interruptible(PIPE_WAIT(*inode));
222 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
223 }
224 if (ret > 0)
225 file_accessed(filp);
226 return ret;
227}
228
229static ssize_t
230pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
231{
232 struct iovec iov = { .iov_base = buf, .iov_len = count };
233 return pipe_readv(filp, &iov, 1, ppos);
234}
235
236static ssize_t
237pipe_writev(struct file *filp, const struct iovec *_iov,
238 unsigned long nr_segs, loff_t *ppos)
239{
240 struct inode *inode = filp->f_dentry->d_inode;
241 struct pipe_inode_info *info;
242 ssize_t ret;
243 int do_wakeup;
244 struct iovec *iov = (struct iovec *)_iov;
245 size_t total_len;
246 ssize_t chars;
247
248 total_len = iov_length(iov, nr_segs);
249 /* Null write succeeds. */
250 if (unlikely(total_len == 0))
251 return 0;
252
253 do_wakeup = 0;
254 ret = 0;
1b1dcc1b 255 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
256 info = inode->i_pipe;
257
258 if (!PIPE_READERS(*inode)) {
259 send_sig(SIGPIPE, current, 0);
260 ret = -EPIPE;
261 goto out;
262 }
263
264 /* We try to merge small writes */
265 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
266 if (info->nrbufs && chars != 0) {
267 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
268 struct pipe_buffer *buf = info->bufs + lastbuf;
269 struct pipe_buf_operations *ops = buf->ops;
270 int offset = buf->offset + buf->len;
271 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
5274f052
JA
272 void *addr;
273 int error;
274
275 addr = ops->map(filp, info, buf);
276 if (IS_ERR(addr)) {
277 error = PTR_ERR(addr);
278 goto out;
279 }
280 error = pipe_iov_copy_from_user(offset + addr, iov,
281 chars);
1da177e4
LT
282 ops->unmap(info, buf);
283 ret = error;
284 do_wakeup = 1;
285 if (error)
286 goto out;
287 buf->len += chars;
288 total_len -= chars;
289 ret = chars;
290 if (!total_len)
291 goto out;
292 }
293 }
294
295 for (;;) {
296 int bufs;
297 if (!PIPE_READERS(*inode)) {
298 send_sig(SIGPIPE, current, 0);
299 if (!ret) ret = -EPIPE;
300 break;
301 }
302 bufs = info->nrbufs;
303 if (bufs < PIPE_BUFFERS) {
304 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
305 struct pipe_buffer *buf = info->bufs + newbuf;
306 struct page *page = info->tmp_page;
307 int error;
308
309 if (!page) {
310 page = alloc_page(GFP_HIGHUSER);
311 if (unlikely(!page)) {
312 ret = ret ? : -ENOMEM;
313 break;
314 }
315 info->tmp_page = page;
316 }
317 /* Always wakeup, even if the copy fails. Otherwise
318 * we lock up (O_NONBLOCK-)readers that sleep due to
319 * syscall merging.
320 * FIXME! Is this really true?
321 */
322 do_wakeup = 1;
323 chars = PAGE_SIZE;
324 if (chars > total_len)
325 chars = total_len;
326
327 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
328 kunmap(page);
329 if (unlikely(error)) {
330 if (!ret) ret = -EFAULT;
331 break;
332 }
333 ret += chars;
334
335 /* Insert it into the buffer array */
336 buf->page = page;
337 buf->ops = &anon_pipe_buf_ops;
338 buf->offset = 0;
339 buf->len = chars;
340 info->nrbufs = ++bufs;
341 info->tmp_page = NULL;
342
343 total_len -= chars;
344 if (!total_len)
345 break;
346 }
347 if (bufs < PIPE_BUFFERS)
348 continue;
349 if (filp->f_flags & O_NONBLOCK) {
350 if (!ret) ret = -EAGAIN;
351 break;
352 }
353 if (signal_pending(current)) {
354 if (!ret) ret = -ERESTARTSYS;
355 break;
356 }
357 if (do_wakeup) {
358 wake_up_interruptible_sync(PIPE_WAIT(*inode));
359 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
360 do_wakeup = 0;
361 }
362 PIPE_WAITING_WRITERS(*inode)++;
363 pipe_wait(inode);
364 PIPE_WAITING_WRITERS(*inode)--;
365 }
366out:
1b1dcc1b 367 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
368 if (do_wakeup) {
369 wake_up_interruptible(PIPE_WAIT(*inode));
370 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
371 }
372 if (ret > 0)
870f4817 373 file_update_time(filp);
1da177e4
LT
374 return ret;
375}
376
377static ssize_t
378pipe_write(struct file *filp, const char __user *buf,
379 size_t count, loff_t *ppos)
380{
381 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
382 return pipe_writev(filp, &iov, 1, ppos);
383}
384
385static ssize_t
386bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
387{
388 return -EBADF;
389}
390
391static ssize_t
392bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
393{
394 return -EBADF;
395}
396
397static int
398pipe_ioctl(struct inode *pino, struct file *filp,
399 unsigned int cmd, unsigned long arg)
400{
401 struct inode *inode = filp->f_dentry->d_inode;
402 struct pipe_inode_info *info;
403 int count, buf, nrbufs;
404
405 switch (cmd) {
406 case FIONREAD:
1b1dcc1b 407 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
408 info = inode->i_pipe;
409 count = 0;
410 buf = info->curbuf;
411 nrbufs = info->nrbufs;
412 while (--nrbufs >= 0) {
413 count += info->bufs[buf].len;
414 buf = (buf+1) & (PIPE_BUFFERS-1);
415 }
1b1dcc1b 416 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
417 return put_user(count, (int __user *)arg);
418 default:
419 return -EINVAL;
420 }
421}
422
423/* No kernel lock held - fine */
424static unsigned int
425pipe_poll(struct file *filp, poll_table *wait)
426{
427 unsigned int mask;
428 struct inode *inode = filp->f_dentry->d_inode;
429 struct pipe_inode_info *info = inode->i_pipe;
430 int nrbufs;
431
432 poll_wait(filp, PIPE_WAIT(*inode), wait);
433
434 /* Reading only -- no need for acquiring the semaphore. */
435 nrbufs = info->nrbufs;
436 mask = 0;
437 if (filp->f_mode & FMODE_READ) {
438 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
439 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
440 mask |= POLLHUP;
441 }
442
443 if (filp->f_mode & FMODE_WRITE) {
444 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
5e5d7a22
PE
445 /*
446 * Most Unices do not set POLLERR for FIFOs but on Linux they
447 * behave exactly like pipes for poll().
448 */
1da177e4
LT
449 if (!PIPE_READERS(*inode))
450 mask |= POLLERR;
451 }
452
453 return mask;
454}
455
1da177e4
LT
456static int
457pipe_release(struct inode *inode, int decr, int decw)
458{
1b1dcc1b 459 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
460 PIPE_READERS(*inode) -= decr;
461 PIPE_WRITERS(*inode) -= decw;
462 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
463 free_pipe_info(inode);
464 } else {
465 wake_up_interruptible(PIPE_WAIT(*inode));
466 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
467 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
468 }
1b1dcc1b 469 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
470
471 return 0;
472}
473
474static int
475pipe_read_fasync(int fd, struct file *filp, int on)
476{
477 struct inode *inode = filp->f_dentry->d_inode;
478 int retval;
479
1b1dcc1b 480 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 481 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
1b1dcc1b 482 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
483
484 if (retval < 0)
485 return retval;
486
487 return 0;
488}
489
490
491static int
492pipe_write_fasync(int fd, struct file *filp, int on)
493{
494 struct inode *inode = filp->f_dentry->d_inode;
495 int retval;
496
1b1dcc1b 497 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 498 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
1b1dcc1b 499 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
500
501 if (retval < 0)
502 return retval;
503
504 return 0;
505}
506
507
508static int
509pipe_rdwr_fasync(int fd, struct file *filp, int on)
510{
511 struct inode *inode = filp->f_dentry->d_inode;
512 int retval;
513
1b1dcc1b 514 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
515
516 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
517
518 if (retval >= 0)
519 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
520
1b1dcc1b 521 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
522
523 if (retval < 0)
524 return retval;
525
526 return 0;
527}
528
529
530static int
531pipe_read_release(struct inode *inode, struct file *filp)
532{
533 pipe_read_fasync(-1, filp, 0);
534 return pipe_release(inode, 1, 0);
535}
536
537static int
538pipe_write_release(struct inode *inode, struct file *filp)
539{
540 pipe_write_fasync(-1, filp, 0);
541 return pipe_release(inode, 0, 1);
542}
543
544static int
545pipe_rdwr_release(struct inode *inode, struct file *filp)
546{
547 int decr, decw;
548
549 pipe_rdwr_fasync(-1, filp, 0);
550 decr = (filp->f_mode & FMODE_READ) != 0;
551 decw = (filp->f_mode & FMODE_WRITE) != 0;
552 return pipe_release(inode, decr, decw);
553}
554
555static int
556pipe_read_open(struct inode *inode, struct file *filp)
557{
558 /* We could have perhaps used atomic_t, but this and friends
559 below are the only places. So it doesn't seem worthwhile. */
1b1dcc1b 560 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 561 PIPE_READERS(*inode)++;
1b1dcc1b 562 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
563
564 return 0;
565}
566
567static int
568pipe_write_open(struct inode *inode, struct file *filp)
569{
1b1dcc1b 570 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 571 PIPE_WRITERS(*inode)++;
1b1dcc1b 572 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
573
574 return 0;
575}
576
577static int
578pipe_rdwr_open(struct inode *inode, struct file *filp)
579{
1b1dcc1b 580 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
581 if (filp->f_mode & FMODE_READ)
582 PIPE_READERS(*inode)++;
583 if (filp->f_mode & FMODE_WRITE)
584 PIPE_WRITERS(*inode)++;
1b1dcc1b 585 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
586
587 return 0;
588}
589
590/*
591 * The file_operations structs are not static because they
592 * are also used in linux/fs/fifo.c to do operations on FIFOs.
593 */
4b6f5d20 594const struct file_operations read_fifo_fops = {
1da177e4
LT
595 .llseek = no_llseek,
596 .read = pipe_read,
597 .readv = pipe_readv,
598 .write = bad_pipe_w,
5e5d7a22 599 .poll = pipe_poll,
1da177e4
LT
600 .ioctl = pipe_ioctl,
601 .open = pipe_read_open,
602 .release = pipe_read_release,
603 .fasync = pipe_read_fasync,
604};
605
4b6f5d20 606const struct file_operations write_fifo_fops = {
1da177e4
LT
607 .llseek = no_llseek,
608 .read = bad_pipe_r,
609 .write = pipe_write,
610 .writev = pipe_writev,
5e5d7a22 611 .poll = pipe_poll,
1da177e4
LT
612 .ioctl = pipe_ioctl,
613 .open = pipe_write_open,
614 .release = pipe_write_release,
615 .fasync = pipe_write_fasync,
616};
617
4b6f5d20 618const struct file_operations rdwr_fifo_fops = {
1da177e4
LT
619 .llseek = no_llseek,
620 .read = pipe_read,
621 .readv = pipe_readv,
622 .write = pipe_write,
623 .writev = pipe_writev,
5e5d7a22 624 .poll = pipe_poll,
1da177e4
LT
625 .ioctl = pipe_ioctl,
626 .open = pipe_rdwr_open,
627 .release = pipe_rdwr_release,
628 .fasync = pipe_rdwr_fasync,
629};
630
a19cbd4b 631static struct file_operations read_pipe_fops = {
1da177e4
LT
632 .llseek = no_llseek,
633 .read = pipe_read,
634 .readv = pipe_readv,
635 .write = bad_pipe_w,
636 .poll = pipe_poll,
637 .ioctl = pipe_ioctl,
638 .open = pipe_read_open,
639 .release = pipe_read_release,
640 .fasync = pipe_read_fasync,
641};
642
a19cbd4b 643static struct file_operations write_pipe_fops = {
1da177e4
LT
644 .llseek = no_llseek,
645 .read = bad_pipe_r,
646 .write = pipe_write,
647 .writev = pipe_writev,
648 .poll = pipe_poll,
649 .ioctl = pipe_ioctl,
650 .open = pipe_write_open,
651 .release = pipe_write_release,
652 .fasync = pipe_write_fasync,
653};
654
a19cbd4b 655static struct file_operations rdwr_pipe_fops = {
1da177e4
LT
656 .llseek = no_llseek,
657 .read = pipe_read,
658 .readv = pipe_readv,
659 .write = pipe_write,
660 .writev = pipe_writev,
661 .poll = pipe_poll,
662 .ioctl = pipe_ioctl,
663 .open = pipe_rdwr_open,
664 .release = pipe_rdwr_release,
665 .fasync = pipe_rdwr_fasync,
666};
667
668void free_pipe_info(struct inode *inode)
669{
670 int i;
671 struct pipe_inode_info *info = inode->i_pipe;
672
673 inode->i_pipe = NULL;
674 for (i = 0; i < PIPE_BUFFERS; i++) {
675 struct pipe_buffer *buf = info->bufs + i;
676 if (buf->ops)
677 buf->ops->release(info, buf);
678 }
679 if (info->tmp_page)
680 __free_page(info->tmp_page);
681 kfree(info);
682}
683
684struct inode* pipe_new(struct inode* inode)
685{
686 struct pipe_inode_info *info;
687
11b0b5ab 688 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
1da177e4
LT
689 if (!info)
690 goto fail_page;
1da177e4
LT
691 inode->i_pipe = info;
692
693 init_waitqueue_head(PIPE_WAIT(*inode));
694 PIPE_RCOUNTER(*inode) = PIPE_WCOUNTER(*inode) = 1;
695
696 return inode;
697fail_page:
698 return NULL;
699}
700
fa3536cc 701static struct vfsmount *pipe_mnt __read_mostly;
1da177e4
LT
702static int pipefs_delete_dentry(struct dentry *dentry)
703{
704 return 1;
705}
706static struct dentry_operations pipefs_dentry_operations = {
707 .d_delete = pipefs_delete_dentry,
708};
709
710static struct inode * get_pipe_inode(void)
711{
712 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
713
714 if (!inode)
715 goto fail_inode;
716
717 if(!pipe_new(inode))
718 goto fail_iput;
719 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
720 inode->i_fop = &rdwr_pipe_fops;
721
722 /*
723 * Mark the inode dirty from the very beginning,
724 * that way it will never be moved to the dirty
725 * list because "mark_inode_dirty()" will think
726 * that it already _is_ on the dirty list.
727 */
728 inode->i_state = I_DIRTY;
729 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
730 inode->i_uid = current->fsuid;
731 inode->i_gid = current->fsgid;
732 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
733 inode->i_blksize = PAGE_SIZE;
734 return inode;
735
736fail_iput:
737 iput(inode);
738fail_inode:
739 return NULL;
740}
741
742int do_pipe(int *fd)
743{
744 struct qstr this;
745 char name[32];
746 struct dentry *dentry;
747 struct inode * inode;
748 struct file *f1, *f2;
749 int error;
750 int i,j;
751
752 error = -ENFILE;
753 f1 = get_empty_filp();
754 if (!f1)
755 goto no_files;
756
757 f2 = get_empty_filp();
758 if (!f2)
759 goto close_f1;
760
761 inode = get_pipe_inode();
762 if (!inode)
763 goto close_f12;
764
765 error = get_unused_fd();
766 if (error < 0)
767 goto close_f12_inode;
768 i = error;
769
770 error = get_unused_fd();
771 if (error < 0)
772 goto close_f12_inode_i;
773 j = error;
774
775 error = -ENOMEM;
776 sprintf(name, "[%lu]", inode->i_ino);
777 this.name = name;
778 this.len = strlen(name);
779 this.hash = inode->i_ino; /* will go */
780 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
781 if (!dentry)
782 goto close_f12_inode_i_j;
783 dentry->d_op = &pipefs_dentry_operations;
784 d_add(dentry, inode);
785 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
786 f1->f_dentry = f2->f_dentry = dget(dentry);
787 f1->f_mapping = f2->f_mapping = inode->i_mapping;
788
789 /* read file */
790 f1->f_pos = f2->f_pos = 0;
791 f1->f_flags = O_RDONLY;
792 f1->f_op = &read_pipe_fops;
793 f1->f_mode = FMODE_READ;
794 f1->f_version = 0;
795
796 /* write file */
797 f2->f_flags = O_WRONLY;
798 f2->f_op = &write_pipe_fops;
799 f2->f_mode = FMODE_WRITE;
800 f2->f_version = 0;
801
802 fd_install(i, f1);
803 fd_install(j, f2);
804 fd[0] = i;
805 fd[1] = j;
806 return 0;
807
808close_f12_inode_i_j:
809 put_unused_fd(j);
810close_f12_inode_i:
811 put_unused_fd(i);
812close_f12_inode:
813 free_pipe_info(inode);
814 iput(inode);
815close_f12:
816 put_filp(f2);
817close_f1:
818 put_filp(f1);
819no_files:
820 return error;
821}
822
823/*
824 * pipefs should _never_ be mounted by userland - too much of security hassle,
825 * no real gain from having the whole whorehouse mounted. So we don't need
826 * any operations on the root directory. However, we need a non-trivial
827 * d_name - pipe: will go nicely and kill the special-casing in procfs.
828 */
829
830static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
831 int flags, const char *dev_name, void *data)
832{
833 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
834}
835
836static struct file_system_type pipe_fs_type = {
837 .name = "pipefs",
838 .get_sb = pipefs_get_sb,
839 .kill_sb = kill_anon_super,
840};
841
842static int __init init_pipe_fs(void)
843{
844 int err = register_filesystem(&pipe_fs_type);
845 if (!err) {
846 pipe_mnt = kern_mount(&pipe_fs_type);
847 if (IS_ERR(pipe_mnt)) {
848 err = PTR_ERR(pipe_mnt);
849 unregister_filesystem(&pipe_fs_type);
850 }
851 }
852 return err;
853}
854
855static void __exit exit_pipe_fs(void)
856{
857 unregister_filesystem(&pipe_fs_type);
858 mntput(pipe_mnt);
859}
860
861fs_initcall(init_pipe_fs);
862module_exit(exit_pipe_fs);