[net-next-2.6.git] / mm / filemap_xip.c

/*
 *	linux/mm/filemap_xip.c
 *
 * Copyright (C) 2005 IBM Corporation
 * Author: Carsten Otte <cotte@de.ibm.com>
 *
 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
 *
 */

#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/rmap.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
#include "filemap.h"

/*
 * We do use our own empty page to avoid interference with other users
 * of ZERO_PAGE(), such as /dev/zero
 */
static struct page *__xip_sparse_page;

static struct page *xip_sparse_page(void)
{
	if (!__xip_sparse_page) {
		unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
		if (zeroes) {
			static DEFINE_SPINLOCK(xip_alloc_lock);
			spin_lock(&xip_alloc_lock);
			if (!__xip_sparse_page)
				__xip_sparse_page = virt_to_page(zeroes);
			else
				free_page(zeroes);
			spin_unlock(&xip_alloc_lock);
		}
	}
	return __xip_sparse_page;
}

/*
 * This is a file read routine for execute in place files, and uses
 * the mapping->a_ops->get_xip_page() function for the actual low-level
 * stuff.
 *
 * Note the struct file* is not used at all.  It may be NULL.
 */
static void
do_xip_mapping_read(struct address_space *mapping,
		    struct file_ra_state *_ra,
		    struct file *filp,
		    loff_t *ppos,
		    read_descriptor_t *desc,
		    read_actor_t actor)
{
	struct inode *inode = mapping->host;
	unsigned long index, end_index, offset;
	loff_t isize;

	BUG_ON(!mapping->a_ops->get_xip_page);

	index = *ppos >> PAGE_CACHE_SHIFT;
	offset = *ppos & ~PAGE_CACHE_MASK;

	isize = i_size_read(inode);
	if (!isize)
		goto out;

	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
	for (;;) {
		struct page *page;
		unsigned long nr, ret;

		/* nr is the maximum number of bytes to copy from this page */
		nr = PAGE_CACHE_SIZE;
		if (index >= end_index) {
			if (index > end_index)
				goto out;
			nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
			if (nr <= offset) {
				goto out;
			}
		}
		nr = nr - offset;

		page = mapping->a_ops->get_xip_page(mapping,
			index*(PAGE_SIZE/512), 0);
		if (!page)
			goto no_xip_page;
		if (unlikely(IS_ERR(page))) {
			if (PTR_ERR(page) == -ENODATA) {
				/* sparse */
				page = ZERO_PAGE(0);
			} else {
				desc->error = PTR_ERR(page);
				goto out;
			}
		}

		/* If users can be writing to this page using arbitrary
		 * virtual addresses, take care about potential aliasing
		 * before reading the page on the kernel side.
		 */
		if (mapping_writably_mapped(mapping))
			flush_dcache_page(page);

		/*
		 * Ok, we have the page, so now we can copy it to user space...
		 *
		 * The actor routine returns how many bytes were actually used..
		 * NOTE! This may not be the same as how much of a user buffer
		 * we filled up (we may be padding etc), so we can only update
		 * "pos" here (the actor routine has to update the user buffer
		 * pointers and the remaining count).
		 */
		ret = actor(desc, page, offset, nr);
		offset += ret;
		index += offset >> PAGE_CACHE_SHIFT;
		offset &= ~PAGE_CACHE_MASK;

		if (ret == nr && desc->count)
			continue;
		goto out;

no_xip_page:
		/* Did not get the page. Report it */
		desc->error = -EIO;
		goto out;
	}

out:
	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
	if (filp)
		file_accessed(filp);
}

ssize_t
xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
	read_descriptor_t desc;

	if (!access_ok(VERIFY_WRITE, buf, len))
		return -EFAULT;

	desc.written = 0;
	desc.arg.buf = buf;
	desc.count = len;
	desc.error = 0;

	do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
			    ppos, &desc, file_read_actor);

	if (desc.written)
		return desc.written;
	else
		return desc.error;
}
EXPORT_SYMBOL_GPL(xip_file_read);

/*
 * __xip_unmap is invoked from xip_unmap and
 * xip_write
 *
 * This function walks all vmas of the address_space and unmaps the
 * __xip_sparse_page when found at pgoff.
 */
static void
__xip_unmap (struct address_space * mapping,
		     unsigned long pgoff)
{
	struct vm_area_struct *vma;
	struct mm_struct *mm;
	struct prio_tree_iter iter;
	unsigned long address;
	pte_t *pte;
	pte_t pteval;
	spinlock_t *ptl;
	struct page *page;

	page = __xip_sparse_page;
	if (!page)
		return;

	spin_lock(&mapping->i_mmap_lock);
	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
		mm = vma->vm_mm;
		address = vma->vm_start +
			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
		BUG_ON(address < vma->vm_start || address >= vma->vm_end);
		pte = page_check_address(page, mm, address, &ptl);
		if (pte) {
			/* Nuke the page table entry. */
			flush_cache_page(vma, address, pte_pfn(*pte));
			pteval = ptep_clear_flush(vma, address, pte);
			page_remove_rmap(page, vma);
			dec_mm_counter(mm, file_rss);
			BUG_ON(pte_dirty(pteval));
			pte_unmap_unlock(pte, ptl);
			page_cache_release(page);
		}
	}
	spin_unlock(&mapping->i_mmap_lock);
}

/*
 * xip_nopage() is invoked via the vma operations vector for a
 * mapped memory region to read in file data during a page fault.
 *
 * This function is derived from filemap_nopage, but used for execute in place
 */
static struct page *
xip_file_nopage(struct vm_area_struct * area,
		   unsigned long address,
		   int *type)
{
	struct file *file = area->vm_file;
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	struct page *page;
	unsigned long size, pgoff, endoff;

	pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
		+ area->vm_pgoff;
	endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
		+ area->vm_pgoff;

	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	if (pgoff >= size)
		return NOPAGE_SIGBUS;

	page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
	if (!IS_ERR(page))
		goto out;
	if (PTR_ERR(page) != -ENODATA)
		return NOPAGE_SIGBUS;

	/* sparse block */
	if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
	    (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
	    (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
		/* maybe shared writable, allocate new block */
		page = mapping->a_ops->get_xip_page (mapping,
			pgoff*(PAGE_SIZE/512), 1);
		if (IS_ERR(page))
			return NOPAGE_SIGBUS;
		/* unmap page at pgoff from all other vmas */
		__xip_unmap(mapping, pgoff);
	} else {
		/* not shared and writable, use xip_sparse_page() */
		page = xip_sparse_page();
		if (!page)
			return NOPAGE_OOM;
	}

out:
	page_cache_get(page);
	return page;
}

static struct vm_operations_struct xip_file_vm_ops = {
	.nopage         = xip_file_nopage,
};

int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
{
	BUG_ON(!file->f_mapping->a_ops->get_xip_page);

	file_accessed(file);
	vma->vm_ops = &xip_file_vm_ops;
	return 0;
}
EXPORT_SYMBOL_GPL(xip_file_mmap);

static ssize_t
__xip_file_write(struct file *filp, const char __user *buf,
		  size_t count, loff_t pos, loff_t *ppos)
{
	struct address_space * mapping = filp->f_mapping;
	const struct address_space_operations *a_ops = mapping->a_ops;
	struct inode 	*inode = mapping->host;
	long		status = 0;
	struct page	*page;
	size_t		bytes;
	ssize_t		written = 0;

	BUG_ON(!mapping->a_ops->get_xip_page);

	do {
		unsigned long index;
		unsigned long offset;
		size_t copied;

		offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
		index = pos >> PAGE_CACHE_SHIFT;
		bytes = PAGE_CACHE_SIZE - offset;
		if (bytes > count)
			bytes = count;

		/*
		 * Bring in the user page that we will copy from _first_.
		 * Otherwise there's a nasty deadlock on copying from the
		 * same page as we're writing to, without it being marked
		 * up-to-date.
		 */
		fault_in_pages_readable(buf, bytes);

		page = a_ops->get_xip_page(mapping,
					   index*(PAGE_SIZE/512), 0);
		if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
			/* we allocate a new page unmap it */
			page = a_ops->get_xip_page(mapping,
						   index*(PAGE_SIZE/512), 1);
			if (!IS_ERR(page))
				/* unmap page at pgoff from all other vmas */
				__xip_unmap(mapping, index);
		}

		if (IS_ERR(page)) {
			status = PTR_ERR(page);
			break;
		}

		copied = filemap_copy_from_user(page, offset, buf, bytes);
		flush_dcache_page(page);
		if (likely(copied > 0)) {
			status = copied;

			if (status >= 0) {
				written += status;
				count -= status;
				pos += status;
				buf += status;
			}
		}
		if (unlikely(copied != bytes))
			if (status >= 0)
				status = -EFAULT;
		if (status < 0)
			break;
	} while (count);
	*ppos = pos;
	/*
	 * No need to use i_size_read() here, the i_size
	 * cannot change under us because we hold i_mutex.
	 */
	if (pos > inode->i_size) {
		i_size_write(inode, pos);
		mark_inode_dirty(inode);
	}

	return written ? written : status;
}

ssize_t
xip_file_write(struct file *filp, const char __user *buf, size_t len,
	       loff_t *ppos)
{
	struct address_space *mapping = filp->f_mapping;
	struct inode *inode = mapping->host;
	size_t count;
	loff_t pos;
	ssize_t ret;

	mutex_lock(&inode->i_mutex);

	if (!access_ok(VERIFY_READ, buf, len)) {
		ret=-EFAULT;
		goto out_up;
	}

	pos = *ppos;
	count = len;

	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = mapping->backing_dev_info;

	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
	if (ret)
		goto out_backing;
	if (count == 0)
		goto out_backing;

	ret = remove_suid(filp->f_path.dentry);
	if (ret)
		goto out_backing;

	file_update_time(filp);

	ret = __xip_file_write (filp, buf, count, pos, ppos);

 out_backing:
	current->backing_dev_info = NULL;
 out_up:
	mutex_unlock(&inode->i_mutex);
	return ret;
}
EXPORT_SYMBOL_GPL(xip_file_write);

/*
 * truncate a page used for execute in place
 * functionality is analog to block_truncate_page but does use get_xip_page
 * to get the page instead of page cache
 */
int
xip_truncate_page(struct address_space *mapping, loff_t from)
{
	pgoff_t index = from >> PAGE_CACHE_SHIFT;
	unsigned offset = from & (PAGE_CACHE_SIZE-1);
	unsigned blocksize;
	unsigned length;
	struct page *page;

	BUG_ON(!mapping->a_ops->get_xip_page);

	blocksize = 1 << mapping->host->i_blkbits;
	length = offset & (blocksize - 1);

	/* Block boundary? Nothing to do */
	if (!length)
		return 0;

	length = blocksize - length;

	page = mapping->a_ops->get_xip_page(mapping,
					    index*(PAGE_SIZE/512), 0);
	if (!page)
		return -ENOMEM;
	if (unlikely(IS_ERR(page))) {
		if (PTR_ERR(page) == -ENODATA)
			/* Hole? No need to truncate */
			return 0;
		else
			return PTR_ERR(page);
	}
	zero_user_page(page, offset, length, KM_USER0);
	return 0;
}
EXPORT_SYMBOL_GPL(xip_truncate_page);
Commit	Line	Data
ceffc078 CO	1	/*
	2	* linux/mm/filemap_xip.c
	3	*
	4	* Copyright (C) 2005 IBM Corporation
	5	* Author: Carsten Otte <cotte@de.ibm.com>
	6	*
	7	* derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
	8	*
	9	*/
	10
	11	#include <linux/fs.h>
	12	#include <linux/pagemap.h>
	13	#include <linux/module.h>
	14	#include <linux/uio.h>
	15	#include <linux/rmap.h>
e8edc6e0	16	#include <linux/sched.h>
ceffc078 CO	17	#include <asm/tlbflush.h>
	18	#include "filemap.h"
	19
a76c0b97 CO	20	/*
	21	* We do use our own empty page to avoid interference with other users
	22	* of ZERO_PAGE(), such as /dev/zero
	23	*/
	24	static struct page *__xip_sparse_page;
	25
	26	static struct page *xip_sparse_page(void)
	27	{
	28	if (!__xip_sparse_page) {
	29	unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER);
	30	if (zeroes) {
	31	static DEFINE_SPINLOCK(xip_alloc_lock);
	32	spin_lock(&xip_alloc_lock);
	33	if (!__xip_sparse_page)
	34	__xip_sparse_page = virt_to_page(zeroes);
	35	else
	36	free_page(zeroes);
	37	spin_unlock(&xip_alloc_lock);
	38	}
	39	}
	40	return __xip_sparse_page;
	41	}
	42
ceffc078 CO	43	/*
	44	* This is a file read routine for execute in place files, and uses
	45	* the mapping->a_ops->get_xip_page() function for the actual low-level
	46	* stuff.
	47	*
	48	* Note the struct file* is not used at all. It may be NULL.
	49	*/
	50	static void
	51	do_xip_mapping_read(struct address_space *mapping,
	52	struct file_ra_state *_ra,
	53	struct file *filp,
	54	loff_t *ppos,
	55	read_descriptor_t *desc,
	56	read_actor_t actor)
	57	{
	58	struct inode *inode = mapping->host;
	59	unsigned long index, end_index, offset;
	60	loff_t isize;
	61
	62	BUG_ON(!mapping->a_ops->get_xip_page);
	63
	64	index = *ppos >> PAGE_CACHE_SHIFT;
	65	offset = *ppos & ~PAGE_CACHE_MASK;
	66
	67	isize = i_size_read(inode);
	68	if (!isize)
	69	goto out;
	70
	71	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
	72	for (;;) {
	73	struct page *page;
	74	unsigned long nr, ret;
	75
	76	/* nr is the maximum number of bytes to copy from this page */
	77	nr = PAGE_CACHE_SIZE;
	78	if (index >= end_index) {
	79	if (index > end_index)
	80	goto out;
	81	nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
	82	if (nr <= offset) {
	83	goto out;
	84	}
	85	}
	86	nr = nr - offset;
	87
	88	page = mapping->a_ops->get_xip_page(mapping,
	89	index*(PAGE_SIZE/512), 0);
	90	if (!page)
	91	goto no_xip_page;
	92	if (unlikely(IS_ERR(page))) {
	93	if (PTR_ERR(page) == -ENODATA) {
	94	/* sparse */
afa597ba	95	page = ZERO_PAGE(0);
ceffc078 CO	96	} else {
	97	desc->error = PTR_ERR(page);
	98	goto out;
	99	}
afa597ba	100	}
ceffc078 CO	101
	102	/* If users can be writing to this page using arbitrary
	103	* virtual addresses, take care about potential aliasing
	104	* before reading the page on the kernel side.
	105	*/
	106	if (mapping_writably_mapped(mapping))
	107	flush_dcache_page(page);
	108
	109	/*
afa597ba	110	* Ok, we have the page, so now we can copy it to user space...
ceffc078 CO	111	*
	112	* The actor routine returns how many bytes were actually used..
	113	* NOTE! This may not be the same as how much of a user buffer
	114	* we filled up (we may be padding etc), so we can only update
	115	* "pos" here (the actor routine has to update the user buffer
	116	* pointers and the remaining count).
	117	*/
	118	ret = actor(desc, page, offset, nr);
	119	offset += ret;
	120	index += offset >> PAGE_CACHE_SHIFT;
	121	offset &= ~PAGE_CACHE_MASK;
	122
	123	if (ret == nr && desc->count)
	124	continue;
	125	goto out;
	126
	127	no_xip_page:
	128	/* Did not get the page. Report it */
	129	desc->error = -EIO;
	130	goto out;
	131	}
	132
	133	out:
	134	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
	135	if (filp)
	136	file_accessed(filp);
	137	}
	138
ceffc078	139	ssize_t
eb6fe0c3	140	xip_file_read(struct file filp, char __user buf, size_t len, loff_t *ppos)
ceffc078	141	{
eb6fe0c3	142	read_descriptor_t desc;
ceffc078	143
eb6fe0c3 CO	144	if (!access_ok(VERIFY_WRITE, buf, len))
eb6fe0c3 CO	145	return -EFAULT;
ceffc078	146
eb6fe0c3 CO	147	desc.written = 0;
	148	desc.arg.buf = buf;
	149	desc.count = len;
	150	desc.error = 0;
ceffc078	151
eb6fe0c3 CO	152	do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp,
	153	ppos, &desc, file_read_actor);
	154
	155	if (desc.written)
	156	return desc.written;
	157	else
	158	return desc.error;
ceffc078	159	}
eb6fe0c3	160	EXPORT_SYMBOL_GPL(xip_file_read);
ceffc078	161
ceffc078 CO	162	/*
	163	* __xip_unmap is invoked from xip_unmap and
	164	* xip_write
	165	*
	166	* This function walks all vmas of the address_space and unmaps the
a76c0b97	167	* __xip_sparse_page when found at pgoff.
ceffc078 CO	168	*/
	169	static void
	170	__xip_unmap (struct address_space * mapping,
	171	unsigned long pgoff)
	172	{
	173	struct vm_area_struct *vma;
	174	struct mm_struct *mm;
	175	struct prio_tree_iter iter;
	176	unsigned long address;
	177	pte_t *pte;
	178	pte_t pteval;
c0718806	179	spinlock_t *ptl;
67b02f11	180	struct page *page;
ceffc078	181
a76c0b97 CO	182	page = __xip_sparse_page;
	183	if (!page)
	184	return;
	185
ceffc078 CO	186	spin_lock(&mapping->i_mmap_lock);
	187	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
	188	mm = vma->vm_mm;
	189	address = vma->vm_start +
	190	((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
	191	BUG_ON(address < vma->vm_start \|\| address >= vma->vm_end);
c0718806 HD	192	pte = page_check_address(page, mm, address, &ptl);
c0718806 HD	193	if (pte) {
ceffc078	194	/* Nuke the page table entry. */
082ff0a9	195	flush_cache_page(vma, address, pte_pfn(*pte));
ceffc078	196	pteval = ptep_clear_flush(vma, address, pte);
7de6b805	197	page_remove_rmap(page, vma);
b5810039	198	dec_mm_counter(mm, file_rss);
ceffc078	199	BUG_ON(pte_dirty(pteval));
c0718806	200	pte_unmap_unlock(pte, ptl);
b5810039	201	page_cache_release(page);
ceffc078 CO	202	}
	203	}
	204	spin_unlock(&mapping->i_mmap_lock);
	205	}
	206
	207	/*
	208	* xip_nopage() is invoked via the vma operations vector for a
	209	* mapped memory region to read in file data during a page fault.
	210	*
	211	* This function is derived from filemap_nopage, but used for execute in place
	212	*/
	213	static struct page *
	214	xip_file_nopage(struct vm_area_struct * area,
	215	unsigned long address,
	216	int *type)
	217	{
	218	struct file *file = area->vm_file;
	219	struct address_space *mapping = file->f_mapping;
	220	struct inode *inode = mapping->host;
	221	struct page *page;
	222	unsigned long size, pgoff, endoff;
	223
	224	pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
	225	+ area->vm_pgoff;
	226	endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
	227	+ area->vm_pgoff;
	228
	229	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
a76c0b97 CO	230	if (pgoff >= size)
a76c0b97 CO	231	return NOPAGE_SIGBUS;
ceffc078 CO	232
ceffc078 CO	233	page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
a76c0b97	234	if (!IS_ERR(page))
b5810039	235	goto out;
ceffc078	236	if (PTR_ERR(page) != -ENODATA)
a76c0b97	237	return NOPAGE_SIGBUS;
ceffc078 CO	238
	239	/* sparse block */
	240	if ((area->vm_flags & (VM_WRITE \| VM_MAYWRITE)) &&
	241	(area->vm_flags & (VM_SHARED\| VM_MAYSHARE)) &&
	242	(!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
	243	/* maybe shared writable, allocate new block */
	244	page = mapping->a_ops->get_xip_page (mapping,
	245	pgoff*(PAGE_SIZE/512), 1);
	246	if (IS_ERR(page))
a76c0b97	247	return NOPAGE_SIGBUS;
ceffc078 CO	248	/* unmap page at pgoff from all other vmas */
	249	__xip_unmap(mapping, pgoff);
	250	} else {
a76c0b97 CO	251	/* not shared and writable, use xip_sparse_page() */
	252	page = xip_sparse_page();
	253	if (!page)
	254	return NOPAGE_OOM;
ceffc078 CO	255	}
ceffc078 CO	256
b5810039 NP	257	out:
b5810039 NP	258	page_cache_get(page);
ceffc078 CO	259	return page;
	260	}
	261
	262	static struct vm_operations_struct xip_file_vm_ops = {
	263	.nopage = xip_file_nopage,
	264	};
	265
	266	int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
	267	{
	268	BUG_ON(!file->f_mapping->a_ops->get_xip_page);
	269
	270	file_accessed(file);
	271	vma->vm_ops = &xip_file_vm_ops;
	272	return 0;
	273	}
	274	EXPORT_SYMBOL_GPL(xip_file_mmap);
	275
	276	static ssize_t
eb6fe0c3 CO	277	__xip_file_write(struct file filp, const char __user buf,
eb6fe0c3 CO	278	size_t count, loff_t pos, loff_t *ppos)
ceffc078	279	{
eb6fe0c3	280	struct address_space * mapping = filp->f_mapping;
f5e54d6e	281	const struct address_space_operations *a_ops = mapping->a_ops;
ceffc078 CO	282	struct inode *inode = mapping->host;
	283	long status = 0;
	284	struct page *page;
	285	size_t bytes;
ceffc078 CO	286	ssize_t written = 0;
	287
	288	BUG_ON(!mapping->a_ops->get_xip_page);
	289
ceffc078 CO	290	do {
	291	unsigned long index;
	292	unsigned long offset;
	293	size_t copied;
	294
	295	offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
	296	index = pos >> PAGE_CACHE_SHIFT;
	297	bytes = PAGE_CACHE_SIZE - offset;
	298	if (bytes > count)
	299	bytes = count;
	300
	301	/*
	302	* Bring in the user page that we will copy from _first_.
	303	* Otherwise there's a nasty deadlock on copying from the
	304	* same page as we're writing to, without it being marked
	305	* up-to-date.
	306	*/
	307	fault_in_pages_readable(buf, bytes);
	308
	309	page = a_ops->get_xip_page(mapping,
eb6fe0c3	310	index*(PAGE_SIZE/512), 0);
ceffc078 CO	311	if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) {
	312	/* we allocate a new page unmap it */
	313	page = a_ops->get_xip_page(mapping,
eb6fe0c3	314	index*(PAGE_SIZE/512), 1);
ceffc078	315	if (!IS_ERR(page))
eb6fe0c3 CO	316	/* unmap page at pgoff from all other vmas */
eb6fe0c3 CO	317	__xip_unmap(mapping, index);
ceffc078 CO	318	}
	319
	320	if (IS_ERR(page)) {
	321	status = PTR_ERR(page);
	322	break;
	323	}
	324
eb6fe0c3	325	copied = filemap_copy_from_user(page, offset, buf, bytes);
ceffc078 CO	326	flush_dcache_page(page);
	327	if (likely(copied > 0)) {
	328	status = copied;
	329
	330	if (status >= 0) {
	331	written += status;
	332	count -= status;
	333	pos += status;
	334	buf += status;
ceffc078 CO	335	}
	336	}
	337	if (unlikely(copied != bytes))
	338	if (status >= 0)
	339	status = -EFAULT;
	340	if (status < 0)
	341	break;
	342	} while (count);
	343	*ppos = pos;
	344	/*
	345	* No need to use i_size_read() here, the i_size
1b1dcc1b	346	* cannot change under us because we hold i_mutex.
ceffc078 CO	347	*/
	348	if (pos > inode->i_size) {
	349	i_size_write(inode, pos);
	350	mark_inode_dirty(inode);
	351	}
	352
	353	return written ? written : status;
	354	}
	355
eb6fe0c3 CO	356	ssize_t
	357	xip_file_write(struct file filp, const char __user buf, size_t len,
	358	loff_t *ppos)
ceffc078	359	{
eb6fe0c3 CO	360	struct address_space *mapping = filp->f_mapping;
	361	struct inode *inode = mapping->host;
	362	size_t count;
	363	loff_t pos;
	364	ssize_t ret;
ceffc078	365
1b1dcc1b	366	mutex_lock(&inode->i_mutex);
ceffc078	367
eb6fe0c3 CO	368	if (!access_ok(VERIFY_READ, buf, len)) {
	369	ret=-EFAULT;
	370	goto out_up;
ceffc078 CO	371	}
ceffc078 CO	372
ceffc078	373	pos = *ppos;
eb6fe0c3	374	count = len;
ceffc078 CO	375
	376	vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
	377
eb6fe0c3 CO	378	/* We can write back this queue in page reclaim */
eb6fe0c3 CO	379	current->backing_dev_info = mapping->backing_dev_info;
ceffc078	380
eb6fe0c3 CO	381	ret = generic_write_checks(filp, &pos, &count, S_ISBLK(inode->i_mode));
	382	if (ret)
	383	goto out_backing;
ceffc078	384	if (count == 0)
eb6fe0c3	385	goto out_backing;
ceffc078	386
d3ac7f89	387	ret = remove_suid(filp->f_path.dentry);
eb6fe0c3 CO	388	if (ret)
eb6fe0c3 CO	389	goto out_backing;
ceffc078	390
870f4817	391	file_update_time(filp);
ceffc078	392
eb6fe0c3	393	ret = __xip_file_write (filp, buf, count, pos, ppos);
ceffc078	394
eb6fe0c3 CO	395	out_backing:
	396	current->backing_dev_info = NULL;
	397	out_up:
1b1dcc1b	398	mutex_unlock(&inode->i_mutex);
ceffc078 CO	399	return ret;
ceffc078 CO	400	}
eb6fe0c3	401	EXPORT_SYMBOL_GPL(xip_file_write);
ceffc078 CO	402
	403	/*
	404	* truncate a page used for execute in place
	405	* functionality is analog to block_truncate_page but does use get_xip_page
	406	* to get the page instead of page cache
	407	*/
	408	int
	409	xip_truncate_page(struct address_space *mapping, loff_t from)
	410	{
	411	pgoff_t index = from >> PAGE_CACHE_SHIFT;
	412	unsigned offset = from & (PAGE_CACHE_SIZE-1);
	413	unsigned blocksize;
	414	unsigned length;
	415	struct page *page;
ceffc078 CO	416
	417	BUG_ON(!mapping->a_ops->get_xip_page);
	418
	419	blocksize = 1 << mapping->host->i_blkbits;
	420	length = offset & (blocksize - 1);
	421
	422	/* Block boundary? Nothing to do */
	423	if (!length)
	424	return 0;
	425
	426	length = blocksize - length;
	427
	428	page = mapping->a_ops->get_xip_page(mapping,
	429	index*(PAGE_SIZE/512), 0);
ceffc078	430	if (!page)
eb6fe0c3	431	return -ENOMEM;
ceffc078	432	if (unlikely(IS_ERR(page))) {
eb6fe0c3	433	if (PTR_ERR(page) == -ENODATA)
ceffc078 CO	434	/* Hole? No need to truncate */
ceffc078 CO	435	return 0;
eb6fe0c3 CO	436	else
eb6fe0c3 CO	437	return PTR_ERR(page);
afa597ba	438	}
01f2705d	439	zero_user_page(page, offset, length, KM_USER0);
eb6fe0c3	440	return 0;
ceffc078 CO	441	}
ceffc078 CO	442	EXPORT_SYMBOL_GPL(xip_truncate_page);