]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/pipe.c
[PATCH] splice: add optional input and output offsets
[net-next-2.6.git] / fs / pipe.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
5274f052 18#include <linux/pagemap.h>
1da177e4
LT
19
20#include <asm/uaccess.h>
21#include <asm/ioctls.h>
22
23/*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38/* Drop the inode semaphore and wait for a pipe event, atomically */
3a326a2c 39void pipe_wait(struct pipe_inode_info *pipe)
1da177e4
LT
40{
41 DEFINE_WAIT(wait);
42
d79fc0fc
IM
43 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
3a326a2c
IM
47 prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE|TASK_NONINTERACTIVE);
48 if (pipe->inode)
49 mutex_unlock(&pipe->inode->i_mutex);
1da177e4 50 schedule();
3a326a2c
IM
51 finish_wait(&pipe->wait, &wait);
52 if (pipe->inode)
53 mutex_lock(&pipe->inode->i_mutex);
1da177e4
LT
54}
55
858119e1 56static int
1da177e4
LT
57pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
58{
59 unsigned long copy;
60
61 while (len > 0) {
62 while (!iov->iov_len)
63 iov++;
64 copy = min_t(unsigned long, len, iov->iov_len);
65
66 if (copy_from_user(to, iov->iov_base, copy))
67 return -EFAULT;
68 to += copy;
69 len -= copy;
70 iov->iov_base += copy;
71 iov->iov_len -= copy;
72 }
73 return 0;
74}
75
858119e1 76static int
1da177e4
LT
77pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
78{
79 unsigned long copy;
80
81 while (len > 0) {
82 while (!iov->iov_len)
83 iov++;
84 copy = min_t(unsigned long, len, iov->iov_len);
85
86 if (copy_to_user(iov->iov_base, from, copy))
87 return -EFAULT;
88 from += copy;
89 len -= copy;
90 iov->iov_base += copy;
91 iov->iov_len -= copy;
92 }
93 return 0;
94}
95
96static void anon_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf)
97{
98 struct page *page = buf->page;
99
3e7ee3e7
JA
100 buf->flags &= ~PIPE_BUF_FLAG_STOLEN;
101
5274f052
JA
102 /*
103 * If nobody else uses this page, and we don't already have a
104 * temporary page, let's keep track of it as a one-deep
105 * allocation cache
106 */
107 if (page_count(page) == 1 && !info->tmp_page) {
108 info->tmp_page = page;
1da177e4
LT
109 return;
110 }
5274f052
JA
111
112 /*
113 * Otherwise just release our reference to it
114 */
115 page_cache_release(page);
1da177e4
LT
116}
117
118static void *anon_pipe_buf_map(struct file *file, struct pipe_inode_info *info, struct pipe_buffer *buf)
119{
120 return kmap(buf->page);
121}
122
123static void anon_pipe_buf_unmap(struct pipe_inode_info *info, struct pipe_buffer *buf)
124{
125 kunmap(buf->page);
126}
127
5abc97aa
JA
128static int anon_pipe_buf_steal(struct pipe_inode_info *info,
129 struct pipe_buffer *buf)
130{
3e7ee3e7 131 buf->flags |= PIPE_BUF_FLAG_STOLEN;
5abc97aa
JA
132 return 0;
133}
134
1da177e4
LT
135static struct pipe_buf_operations anon_pipe_buf_ops = {
136 .can_merge = 1,
137 .map = anon_pipe_buf_map,
138 .unmap = anon_pipe_buf_unmap,
139 .release = anon_pipe_buf_release,
5abc97aa 140 .steal = anon_pipe_buf_steal,
1da177e4
LT
141};
142
143static ssize_t
144pipe_readv(struct file *filp, const struct iovec *_iov,
145 unsigned long nr_segs, loff_t *ppos)
146{
147 struct inode *inode = filp->f_dentry->d_inode;
148 struct pipe_inode_info *info;
149 int do_wakeup;
150 ssize_t ret;
151 struct iovec *iov = (struct iovec *)_iov;
152 size_t total_len;
153
154 total_len = iov_length(iov, nr_segs);
155 /* Null read succeeds. */
156 if (unlikely(total_len == 0))
157 return 0;
158
159 do_wakeup = 0;
160 ret = 0;
1b1dcc1b 161 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
162 info = inode->i_pipe;
163 for (;;) {
164 int bufs = info->nrbufs;
165 if (bufs) {
166 int curbuf = info->curbuf;
167 struct pipe_buffer *buf = info->bufs + curbuf;
168 struct pipe_buf_operations *ops = buf->ops;
169 void *addr;
170 size_t chars = buf->len;
171 int error;
172
173 if (chars > total_len)
174 chars = total_len;
175
176 addr = ops->map(filp, info, buf);
5274f052
JA
177 if (IS_ERR(addr)) {
178 if (!ret)
179 ret = PTR_ERR(addr);
180 break;
181 }
1da177e4
LT
182 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
183 ops->unmap(info, buf);
184 if (unlikely(error)) {
185 if (!ret) ret = -EFAULT;
186 break;
187 }
188 ret += chars;
189 buf->offset += chars;
190 buf->len -= chars;
191 if (!buf->len) {
192 buf->ops = NULL;
193 ops->release(info, buf);
194 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
195 info->curbuf = curbuf;
196 info->nrbufs = --bufs;
197 do_wakeup = 1;
198 }
199 total_len -= chars;
200 if (!total_len)
201 break; /* common path: read succeeded */
202 }
203 if (bufs) /* More to do? */
204 continue;
205 if (!PIPE_WRITERS(*inode))
206 break;
207 if (!PIPE_WAITING_WRITERS(*inode)) {
208 /* syscall merging: Usually we must not sleep
209 * if O_NONBLOCK is set, or if we got some data.
210 * But if a writer sleeps in kernel space, then
211 * we can wait for that data without violating POSIX.
212 */
213 if (ret)
214 break;
215 if (filp->f_flags & O_NONBLOCK) {
216 ret = -EAGAIN;
217 break;
218 }
219 }
220 if (signal_pending(current)) {
221 if (!ret) ret = -ERESTARTSYS;
222 break;
223 }
224 if (do_wakeup) {
225 wake_up_interruptible_sync(PIPE_WAIT(*inode));
226 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
227 }
3a326a2c 228 pipe_wait(inode->i_pipe);
1da177e4 229 }
1b1dcc1b 230 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
231 /* Signal writers asynchronously that there is more room. */
232 if (do_wakeup) {
233 wake_up_interruptible(PIPE_WAIT(*inode));
234 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
235 }
236 if (ret > 0)
237 file_accessed(filp);
238 return ret;
239}
240
241static ssize_t
242pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
243{
244 struct iovec iov = { .iov_base = buf, .iov_len = count };
245 return pipe_readv(filp, &iov, 1, ppos);
246}
247
248static ssize_t
249pipe_writev(struct file *filp, const struct iovec *_iov,
250 unsigned long nr_segs, loff_t *ppos)
251{
252 struct inode *inode = filp->f_dentry->d_inode;
253 struct pipe_inode_info *info;
254 ssize_t ret;
255 int do_wakeup;
256 struct iovec *iov = (struct iovec *)_iov;
257 size_t total_len;
258 ssize_t chars;
259
260 total_len = iov_length(iov, nr_segs);
261 /* Null write succeeds. */
262 if (unlikely(total_len == 0))
263 return 0;
264
265 do_wakeup = 0;
266 ret = 0;
1b1dcc1b 267 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
268 info = inode->i_pipe;
269
270 if (!PIPE_READERS(*inode)) {
271 send_sig(SIGPIPE, current, 0);
272 ret = -EPIPE;
273 goto out;
274 }
275
276 /* We try to merge small writes */
277 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
278 if (info->nrbufs && chars != 0) {
279 int lastbuf = (info->curbuf + info->nrbufs - 1) & (PIPE_BUFFERS-1);
280 struct pipe_buffer *buf = info->bufs + lastbuf;
281 struct pipe_buf_operations *ops = buf->ops;
282 int offset = buf->offset + buf->len;
283 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
5274f052
JA
284 void *addr;
285 int error;
286
287 addr = ops->map(filp, info, buf);
288 if (IS_ERR(addr)) {
289 error = PTR_ERR(addr);
290 goto out;
291 }
292 error = pipe_iov_copy_from_user(offset + addr, iov,
293 chars);
1da177e4
LT
294 ops->unmap(info, buf);
295 ret = error;
296 do_wakeup = 1;
297 if (error)
298 goto out;
299 buf->len += chars;
300 total_len -= chars;
301 ret = chars;
302 if (!total_len)
303 goto out;
304 }
305 }
306
307 for (;;) {
308 int bufs;
309 if (!PIPE_READERS(*inode)) {
310 send_sig(SIGPIPE, current, 0);
311 if (!ret) ret = -EPIPE;
312 break;
313 }
314 bufs = info->nrbufs;
315 if (bufs < PIPE_BUFFERS) {
316 int newbuf = (info->curbuf + bufs) & (PIPE_BUFFERS-1);
317 struct pipe_buffer *buf = info->bufs + newbuf;
318 struct page *page = info->tmp_page;
319 int error;
320
321 if (!page) {
322 page = alloc_page(GFP_HIGHUSER);
323 if (unlikely(!page)) {
324 ret = ret ? : -ENOMEM;
325 break;
326 }
327 info->tmp_page = page;
328 }
329 /* Always wakeup, even if the copy fails. Otherwise
330 * we lock up (O_NONBLOCK-)readers that sleep due to
331 * syscall merging.
332 * FIXME! Is this really true?
333 */
334 do_wakeup = 1;
335 chars = PAGE_SIZE;
336 if (chars > total_len)
337 chars = total_len;
338
339 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
340 kunmap(page);
341 if (unlikely(error)) {
342 if (!ret) ret = -EFAULT;
343 break;
344 }
345 ret += chars;
346
347 /* Insert it into the buffer array */
348 buf->page = page;
349 buf->ops = &anon_pipe_buf_ops;
350 buf->offset = 0;
351 buf->len = chars;
352 info->nrbufs = ++bufs;
353 info->tmp_page = NULL;
354
355 total_len -= chars;
356 if (!total_len)
357 break;
358 }
359 if (bufs < PIPE_BUFFERS)
360 continue;
361 if (filp->f_flags & O_NONBLOCK) {
362 if (!ret) ret = -EAGAIN;
363 break;
364 }
365 if (signal_pending(current)) {
366 if (!ret) ret = -ERESTARTSYS;
367 break;
368 }
369 if (do_wakeup) {
370 wake_up_interruptible_sync(PIPE_WAIT(*inode));
371 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
372 do_wakeup = 0;
373 }
374 PIPE_WAITING_WRITERS(*inode)++;
3a326a2c 375 pipe_wait(inode->i_pipe);
1da177e4
LT
376 PIPE_WAITING_WRITERS(*inode)--;
377 }
378out:
1b1dcc1b 379 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
380 if (do_wakeup) {
381 wake_up_interruptible(PIPE_WAIT(*inode));
382 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
383 }
384 if (ret > 0)
870f4817 385 file_update_time(filp);
1da177e4
LT
386 return ret;
387}
388
389static ssize_t
390pipe_write(struct file *filp, const char __user *buf,
391 size_t count, loff_t *ppos)
392{
393 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
394 return pipe_writev(filp, &iov, 1, ppos);
395}
396
397static ssize_t
398bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
399{
400 return -EBADF;
401}
402
403static ssize_t
404bad_pipe_w(struct file *filp, const char __user *buf, size_t count, loff_t *ppos)
405{
406 return -EBADF;
407}
408
409static int
410pipe_ioctl(struct inode *pino, struct file *filp,
411 unsigned int cmd, unsigned long arg)
412{
413 struct inode *inode = filp->f_dentry->d_inode;
414 struct pipe_inode_info *info;
415 int count, buf, nrbufs;
416
417 switch (cmd) {
418 case FIONREAD:
1b1dcc1b 419 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
420 info = inode->i_pipe;
421 count = 0;
422 buf = info->curbuf;
423 nrbufs = info->nrbufs;
424 while (--nrbufs >= 0) {
425 count += info->bufs[buf].len;
426 buf = (buf+1) & (PIPE_BUFFERS-1);
427 }
1b1dcc1b 428 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
429 return put_user(count, (int __user *)arg);
430 default:
431 return -EINVAL;
432 }
433}
434
435/* No kernel lock held - fine */
436static unsigned int
437pipe_poll(struct file *filp, poll_table *wait)
438{
439 unsigned int mask;
440 struct inode *inode = filp->f_dentry->d_inode;
441 struct pipe_inode_info *info = inode->i_pipe;
442 int nrbufs;
443
444 poll_wait(filp, PIPE_WAIT(*inode), wait);
445
446 /* Reading only -- no need for acquiring the semaphore. */
447 nrbufs = info->nrbufs;
448 mask = 0;
449 if (filp->f_mode & FMODE_READ) {
450 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
451 if (!PIPE_WRITERS(*inode) && filp->f_version != PIPE_WCOUNTER(*inode))
452 mask |= POLLHUP;
453 }
454
455 if (filp->f_mode & FMODE_WRITE) {
456 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
5e5d7a22
PE
457 /*
458 * Most Unices do not set POLLERR for FIFOs but on Linux they
459 * behave exactly like pipes for poll().
460 */
1da177e4
LT
461 if (!PIPE_READERS(*inode))
462 mask |= POLLERR;
463 }
464
465 return mask;
466}
467
1da177e4
LT
468static int
469pipe_release(struct inode *inode, int decr, int decw)
470{
1b1dcc1b 471 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
472 PIPE_READERS(*inode) -= decr;
473 PIPE_WRITERS(*inode) -= decw;
474 if (!PIPE_READERS(*inode) && !PIPE_WRITERS(*inode)) {
475 free_pipe_info(inode);
476 } else {
477 wake_up_interruptible(PIPE_WAIT(*inode));
478 kill_fasync(PIPE_FASYNC_READERS(*inode), SIGIO, POLL_IN);
479 kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT);
480 }
1b1dcc1b 481 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
482
483 return 0;
484}
485
486static int
487pipe_read_fasync(int fd, struct file *filp, int on)
488{
489 struct inode *inode = filp->f_dentry->d_inode;
490 int retval;
491
1b1dcc1b 492 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 493 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
1b1dcc1b 494 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
495
496 if (retval < 0)
497 return retval;
498
499 return 0;
500}
501
502
503static int
504pipe_write_fasync(int fd, struct file *filp, int on)
505{
506 struct inode *inode = filp->f_dentry->d_inode;
507 int retval;
508
1b1dcc1b 509 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 510 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
1b1dcc1b 511 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
512
513 if (retval < 0)
514 return retval;
515
516 return 0;
517}
518
519
520static int
521pipe_rdwr_fasync(int fd, struct file *filp, int on)
522{
523 struct inode *inode = filp->f_dentry->d_inode;
524 int retval;
525
1b1dcc1b 526 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
527
528 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_READERS(*inode));
529
530 if (retval >= 0)
531 retval = fasync_helper(fd, filp, on, PIPE_FASYNC_WRITERS(*inode));
532
1b1dcc1b 533 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
534
535 if (retval < 0)
536 return retval;
537
538 return 0;
539}
540
541
542static int
543pipe_read_release(struct inode *inode, struct file *filp)
544{
545 pipe_read_fasync(-1, filp, 0);
546 return pipe_release(inode, 1, 0);
547}
548
549static int
550pipe_write_release(struct inode *inode, struct file *filp)
551{
552 pipe_write_fasync(-1, filp, 0);
553 return pipe_release(inode, 0, 1);
554}
555
556static int
557pipe_rdwr_release(struct inode *inode, struct file *filp)
558{
559 int decr, decw;
560
561 pipe_rdwr_fasync(-1, filp, 0);
562 decr = (filp->f_mode & FMODE_READ) != 0;
563 decw = (filp->f_mode & FMODE_WRITE) != 0;
564 return pipe_release(inode, decr, decw);
565}
566
567static int
568pipe_read_open(struct inode *inode, struct file *filp)
569{
570 /* We could have perhaps used atomic_t, but this and friends
571 below are the only places. So it doesn't seem worthwhile. */
1b1dcc1b 572 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 573 PIPE_READERS(*inode)++;
1b1dcc1b 574 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
575
576 return 0;
577}
578
579static int
580pipe_write_open(struct inode *inode, struct file *filp)
581{
1b1dcc1b 582 mutex_lock(PIPE_MUTEX(*inode));
1da177e4 583 PIPE_WRITERS(*inode)++;
1b1dcc1b 584 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
585
586 return 0;
587}
588
589static int
590pipe_rdwr_open(struct inode *inode, struct file *filp)
591{
1b1dcc1b 592 mutex_lock(PIPE_MUTEX(*inode));
1da177e4
LT
593 if (filp->f_mode & FMODE_READ)
594 PIPE_READERS(*inode)++;
595 if (filp->f_mode & FMODE_WRITE)
596 PIPE_WRITERS(*inode)++;
1b1dcc1b 597 mutex_unlock(PIPE_MUTEX(*inode));
1da177e4
LT
598
599 return 0;
600}
601
602/*
603 * The file_operations structs are not static because they
604 * are also used in linux/fs/fifo.c to do operations on FIFOs.
605 */
4b6f5d20 606const struct file_operations read_fifo_fops = {
1da177e4
LT
607 .llseek = no_llseek,
608 .read = pipe_read,
609 .readv = pipe_readv,
610 .write = bad_pipe_w,
5e5d7a22 611 .poll = pipe_poll,
1da177e4
LT
612 .ioctl = pipe_ioctl,
613 .open = pipe_read_open,
614 .release = pipe_read_release,
615 .fasync = pipe_read_fasync,
616};
617
4b6f5d20 618const struct file_operations write_fifo_fops = {
1da177e4
LT
619 .llseek = no_llseek,
620 .read = bad_pipe_r,
621 .write = pipe_write,
622 .writev = pipe_writev,
5e5d7a22 623 .poll = pipe_poll,
1da177e4
LT
624 .ioctl = pipe_ioctl,
625 .open = pipe_write_open,
626 .release = pipe_write_release,
627 .fasync = pipe_write_fasync,
628};
629
4b6f5d20 630const struct file_operations rdwr_fifo_fops = {
1da177e4
LT
631 .llseek = no_llseek,
632 .read = pipe_read,
633 .readv = pipe_readv,
634 .write = pipe_write,
635 .writev = pipe_writev,
5e5d7a22 636 .poll = pipe_poll,
1da177e4
LT
637 .ioctl = pipe_ioctl,
638 .open = pipe_rdwr_open,
639 .release = pipe_rdwr_release,
640 .fasync = pipe_rdwr_fasync,
641};
642
a19cbd4b 643static struct file_operations read_pipe_fops = {
1da177e4
LT
644 .llseek = no_llseek,
645 .read = pipe_read,
646 .readv = pipe_readv,
647 .write = bad_pipe_w,
648 .poll = pipe_poll,
649 .ioctl = pipe_ioctl,
650 .open = pipe_read_open,
651 .release = pipe_read_release,
652 .fasync = pipe_read_fasync,
653};
654
a19cbd4b 655static struct file_operations write_pipe_fops = {
1da177e4
LT
656 .llseek = no_llseek,
657 .read = bad_pipe_r,
658 .write = pipe_write,
659 .writev = pipe_writev,
660 .poll = pipe_poll,
661 .ioctl = pipe_ioctl,
662 .open = pipe_write_open,
663 .release = pipe_write_release,
664 .fasync = pipe_write_fasync,
665};
666
a19cbd4b 667static struct file_operations rdwr_pipe_fops = {
1da177e4
LT
668 .llseek = no_llseek,
669 .read = pipe_read,
670 .readv = pipe_readv,
671 .write = pipe_write,
672 .writev = pipe_writev,
673 .poll = pipe_poll,
674 .ioctl = pipe_ioctl,
675 .open = pipe_rdwr_open,
676 .release = pipe_rdwr_release,
677 .fasync = pipe_rdwr_fasync,
678};
679
3a326a2c
IM
680struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
681{
682 struct pipe_inode_info *info;
683
684 info = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
685 if (info) {
686 init_waitqueue_head(&info->wait);
687 info->r_counter = info->w_counter = 1;
688 info->inode = inode;
689 }
690
691 return info;
692}
693
1da177e4
LT
694void free_pipe_info(struct inode *inode)
695{
696 int i;
697 struct pipe_inode_info *info = inode->i_pipe;
698
699 inode->i_pipe = NULL;
700 for (i = 0; i < PIPE_BUFFERS; i++) {
701 struct pipe_buffer *buf = info->bufs + i;
702 if (buf->ops)
703 buf->ops->release(info, buf);
704 }
705 if (info->tmp_page)
706 __free_page(info->tmp_page);
707 kfree(info);
708}
709
fa3536cc 710static struct vfsmount *pipe_mnt __read_mostly;
1da177e4
LT
711static int pipefs_delete_dentry(struct dentry *dentry)
712{
713 return 1;
714}
715static struct dentry_operations pipefs_dentry_operations = {
716 .d_delete = pipefs_delete_dentry,
717};
718
719static struct inode * get_pipe_inode(void)
720{
721 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
722
723 if (!inode)
724 goto fail_inode;
725
3a326a2c
IM
726 inode->i_pipe = alloc_pipe_info(inode);
727 if (!inode->i_pipe)
1da177e4 728 goto fail_iput;
3a326a2c 729
1da177e4
LT
730 PIPE_READERS(*inode) = PIPE_WRITERS(*inode) = 1;
731 inode->i_fop = &rdwr_pipe_fops;
732
733 /*
734 * Mark the inode dirty from the very beginning,
735 * that way it will never be moved to the dirty
736 * list because "mark_inode_dirty()" will think
737 * that it already _is_ on the dirty list.
738 */
739 inode->i_state = I_DIRTY;
740 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
741 inode->i_uid = current->fsuid;
742 inode->i_gid = current->fsgid;
743 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
744 inode->i_blksize = PAGE_SIZE;
745 return inode;
746
747fail_iput:
748 iput(inode);
749fail_inode:
750 return NULL;
751}
752
753int do_pipe(int *fd)
754{
755 struct qstr this;
756 char name[32];
757 struct dentry *dentry;
758 struct inode * inode;
759 struct file *f1, *f2;
760 int error;
761 int i,j;
762
763 error = -ENFILE;
764 f1 = get_empty_filp();
765 if (!f1)
766 goto no_files;
767
768 f2 = get_empty_filp();
769 if (!f2)
770 goto close_f1;
771
772 inode = get_pipe_inode();
773 if (!inode)
774 goto close_f12;
775
776 error = get_unused_fd();
777 if (error < 0)
778 goto close_f12_inode;
779 i = error;
780
781 error = get_unused_fd();
782 if (error < 0)
783 goto close_f12_inode_i;
784 j = error;
785
786 error = -ENOMEM;
787 sprintf(name, "[%lu]", inode->i_ino);
788 this.name = name;
789 this.len = strlen(name);
790 this.hash = inode->i_ino; /* will go */
791 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
792 if (!dentry)
793 goto close_f12_inode_i_j;
794 dentry->d_op = &pipefs_dentry_operations;
795 d_add(dentry, inode);
796 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
797 f1->f_dentry = f2->f_dentry = dget(dentry);
798 f1->f_mapping = f2->f_mapping = inode->i_mapping;
799
800 /* read file */
801 f1->f_pos = f2->f_pos = 0;
802 f1->f_flags = O_RDONLY;
803 f1->f_op = &read_pipe_fops;
804 f1->f_mode = FMODE_READ;
805 f1->f_version = 0;
806
807 /* write file */
808 f2->f_flags = O_WRONLY;
809 f2->f_op = &write_pipe_fops;
810 f2->f_mode = FMODE_WRITE;
811 f2->f_version = 0;
812
813 fd_install(i, f1);
814 fd_install(j, f2);
815 fd[0] = i;
816 fd[1] = j;
817 return 0;
818
819close_f12_inode_i_j:
820 put_unused_fd(j);
821close_f12_inode_i:
822 put_unused_fd(i);
823close_f12_inode:
824 free_pipe_info(inode);
825 iput(inode);
826close_f12:
827 put_filp(f2);
828close_f1:
829 put_filp(f1);
830no_files:
831 return error;
832}
833
834/*
835 * pipefs should _never_ be mounted by userland - too much of security hassle,
836 * no real gain from having the whole whorehouse mounted. So we don't need
837 * any operations on the root directory. However, we need a non-trivial
838 * d_name - pipe: will go nicely and kill the special-casing in procfs.
839 */
840
841static struct super_block *pipefs_get_sb(struct file_system_type *fs_type,
842 int flags, const char *dev_name, void *data)
843{
844 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
845}
846
847static struct file_system_type pipe_fs_type = {
848 .name = "pipefs",
849 .get_sb = pipefs_get_sb,
850 .kill_sb = kill_anon_super,
851};
852
853static int __init init_pipe_fs(void)
854{
855 int err = register_filesystem(&pipe_fs_type);
856 if (!err) {
857 pipe_mnt = kern_mount(&pipe_fs_type);
858 if (IS_ERR(pipe_mnt)) {
859 err = PTR_ERR(pipe_mnt);
860 unregister_filesystem(&pipe_fs_type);
861 }
862 }
863 return err;
864}
865
866static void __exit exit_pipe_fs(void)
867{
868 unregister_filesystem(&pipe_fs_type);
869 mntput(pipe_mnt);
870}
871
872fs_initcall(init_pipe_fs);
873module_exit(exit_pipe_fs);