[net-next-2.6.git] / arch / powerpc / oprofile / cell / spu_task_sync.c

/*
 * Cell Broadband Engine OProfile Support
 *
 * (C) Copyright IBM Corporation 2006
 *
 * Author: Maynard Johnson <maynardj@us.ibm.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version
 * 2 of the License, or (at your option) any later version.
 */

/* The purpose of this file is to handle SPU event task switching
 * and to record SPU context information into the OProfile
 * event buffer.
 *
 * Additionally, the spu_sync_buffer function is provided as a helper
 * for recoding actual SPU program counter samples to the event buffer.
 */
#include <linux/dcookies.h>
#include <linux/kref.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/notifier.h>
#include <linux/numa.h>
#include <linux/oprofile.h>
#include <linux/spinlock.h>
#include "pr_util.h"

#define RELEASE_ALL 9999

static DEFINE_SPINLOCK(buffer_lock);
static DEFINE_SPINLOCK(cache_lock);
static int num_spu_nodes;
int spu_prof_num_nodes;

struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
struct delayed_work spu_work;
static unsigned max_spu_buff;

static void spu_buff_add(unsigned long int value, int spu)
{
	/* spu buff is a circular buffer.  Add entries to the
	 * head.  Head is the index to store the next value.
	 * The buffer is full when there is one available entry
	 * in the queue, i.e. head and tail can't be equal.
	 * That way we can tell the difference between the
	 * buffer being full versus empty.
	 *
	 *  ASSUPTION: the buffer_lock is held when this function
	 *             is called to lock the buffer, head and tail.
	 */
	int full = 1;

	if (spu_buff[spu].head >= spu_buff[spu].tail) {
		if ((spu_buff[spu].head - spu_buff[spu].tail)
		    <  (max_spu_buff - 1))
			full = 0;

	} else if (spu_buff[spu].tail > spu_buff[spu].head) {
		if ((spu_buff[spu].tail - spu_buff[spu].head)
		    > 1)
			full = 0;
	}

	if (!full) {
		spu_buff[spu].buff[spu_buff[spu].head] = value;
		spu_buff[spu].head++;

		if (spu_buff[spu].head >= max_spu_buff)
			spu_buff[spu].head = 0;
	} else {
		/* From the user's perspective make the SPU buffer
		 * size management/overflow look like we are using
		 * per cpu buffers.  The user uses the same
		 * per cpu parameter to adjust the SPU buffer size.
		 * Increment the sample_lost_overflow to inform
		 * the user the buffer size needs to be increased.
		 */
		oprofile_cpu_buffer_inc_smpl_lost();
	}
}

/* This function copies the per SPU buffers to the
 * OProfile kernel buffer.
 */
void sync_spu_buff(void)
{
	int spu;
	unsigned long flags;
	int curr_head;

	for (spu = 0; spu < num_spu_nodes; spu++) {
		/* In case there was an issue and the buffer didn't
		 * get created skip it.
		 */
		if (spu_buff[spu].buff == NULL)
			continue;

		/* Hold the lock to make sure the head/tail
		 * doesn't change while spu_buff_add() is
		 * deciding if the buffer is full or not.
		 * Being a little paranoid.
		 */
		spin_lock_irqsave(&buffer_lock, flags);
		curr_head = spu_buff[spu].head;
		spin_unlock_irqrestore(&buffer_lock, flags);

		/* Transfer the current contents to the kernel buffer.
		 * data can still be added to the head of the buffer.
		 */
		oprofile_put_buff(spu_buff[spu].buff,
				  spu_buff[spu].tail,
				  curr_head, max_spu_buff);

		spin_lock_irqsave(&buffer_lock, flags);
		spu_buff[spu].tail = curr_head;
		spin_unlock_irqrestore(&buffer_lock, flags);
	}

}

static void wq_sync_spu_buff(struct work_struct *work)
{
	/* move data from spu buffers to kernel buffer */
	sync_spu_buff();

	/* only reschedule if profiling is not done */
	if (spu_prof_running)
		schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
}

/* Container for caching information about an active SPU task. */
struct cached_info {
	struct vma_to_fileoffset_map *map;
	struct spu *the_spu;	/* needed to access pointer to local_store */
	struct kref cache_ref;
};

static struct cached_info *spu_info[MAX_NUMNODES * 8];

static void destroy_cached_info(struct kref *kref)
{
	struct cached_info *info;

	info = container_of(kref, struct cached_info, cache_ref);
	vma_map_free(info->map);
	kfree(info);
	module_put(THIS_MODULE);
}

/* Return the cached_info for the passed SPU number.
 * ATTENTION:  Callers are responsible for obtaining the
 *	       cache_lock if needed prior to invoking this function.
 */
static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
{
	struct kref *ref;
	struct cached_info *ret_info;

	if (spu_num >= num_spu_nodes) {
		printk(KERN_ERR "SPU_PROF: "
		       "%s, line %d: Invalid index %d into spu info cache\n",
		       __func__, __LINE__, spu_num);
		ret_info = NULL;
		goto out;
	}
	if (!spu_info[spu_num] && the_spu) {
		ref = spu_get_profile_private_kref(the_spu->ctx);
		if (ref) {
			spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
			kref_get(&spu_info[spu_num]->cache_ref);
		}
	}

	ret_info = spu_info[spu_num];
 out:
	return ret_info;
}


/* Looks for cached info for the passed spu.  If not found, the
 * cached info is created for the passed spu.
 * Returns 0 for success; otherwise, -1 for error.
 */
static int
prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
{
	unsigned long flags;
	struct vma_to_fileoffset_map *new_map;
	int retval = 0;
	struct cached_info *info;

	/* We won't bother getting cache_lock here since
	 * don't do anything with the cached_info that's returned.
	 */
	info = get_cached_info(spu, spu->number);

	if (info) {
		pr_debug("Found cached SPU info.\n");
		goto out;
	}

	/* Create cached_info and set spu_info[spu->number] to point to it.
	 * spu->number is a system-wide value, not a per-node value.
	 */
	info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
	if (!info) {
		printk(KERN_ERR "SPU_PROF: "
		       "%s, line %d: create vma_map failed\n",
		       __func__, __LINE__);
		retval = -ENOMEM;
		goto err_alloc;
	}
	new_map = create_vma_map(spu, objectId);
	if (!new_map) {
		printk(KERN_ERR "SPU_PROF: "
		       "%s, line %d: create vma_map failed\n",
		       __func__, __LINE__);
		retval = -ENOMEM;
		goto err_alloc;
	}

	pr_debug("Created vma_map\n");
	info->map = new_map;
	info->the_spu = spu;
	kref_init(&info->cache_ref);
	spin_lock_irqsave(&cache_lock, flags);
	spu_info[spu->number] = info;
	/* Increment count before passing off ref to SPUFS. */
	kref_get(&info->cache_ref);

	/* We increment the module refcount here since SPUFS is
	 * responsible for the final destruction of the cached_info,
	 * and it must be able to access the destroy_cached_info()
	 * function defined in the OProfile module.  We decrement
	 * the module refcount in destroy_cached_info.
	 */
	try_module_get(THIS_MODULE);
	spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
				destroy_cached_info);
	spin_unlock_irqrestore(&cache_lock, flags);
	goto out;

err_alloc:
	kfree(info);
out:
	return retval;
}

/*
 * NOTE:  The caller is responsible for locking the
 *	  cache_lock prior to calling this function.
 */
static int release_cached_info(int spu_index)
{
	int index, end;

	if (spu_index == RELEASE_ALL) {
		end = num_spu_nodes;
		index = 0;
	} else {
		if (spu_index >= num_spu_nodes) {
			printk(KERN_ERR "SPU_PROF: "
				"%s, line %d: "
				"Invalid index %d into spu info cache\n",
				__func__, __LINE__, spu_index);
			goto out;
		}
		end = spu_index + 1;
		index = spu_index;
	}
	for (; index < end; index++) {
		if (spu_info[index]) {
			kref_put(&spu_info[index]->cache_ref,
				 destroy_cached_info);
			spu_info[index] = NULL;
		}
	}

out:
	return 0;
}

/* The source code for fast_get_dcookie was "borrowed"
 * from drivers/oprofile/buffer_sync.c.
 */

/* Optimisation. We can manage without taking the dcookie sem
 * because we cannot reach this code without at least one
 * dcookie user still being registered (namely, the reader
 * of the event buffer).
 */
static inline unsigned long fast_get_dcookie(struct path *path)
{
	unsigned long cookie;

	if (path->dentry->d_flags & DCACHE_COOKIE)
		return (unsigned long)path->dentry;
	get_dcookie(path, &cookie);
	return cookie;
}

/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
 * which corresponds loosely to "application name". Also, determine
 * the offset for the SPU ELF object.  If computed offset is
 * non-zero, it implies an embedded SPU object; otherwise, it's a
 * separate SPU binary, in which case we retrieve it's dcookie.
 * For the embedded case, we must determine if SPU ELF is embedded
 * in the executable application or another file (i.e., shared lib).
 * If embedded in a shared lib, we must get the dcookie and return
 * that to the caller.
 */
static unsigned long
get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
			    unsigned long *spu_bin_dcookie,
			    unsigned long spu_ref)
{
	unsigned long app_cookie = 0;
	unsigned int my_offset = 0;
	struct file *app = NULL;
	struct vm_area_struct *vma;
	struct mm_struct *mm = spu->mm;

	if (!mm)
		goto out;

	down_read(&mm->mmap_sem);

	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		if (!vma->vm_file)
			continue;
		if (!(vma->vm_flags & VM_EXECUTABLE))
			continue;
		app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
		pr_debug("got dcookie for %s\n",
			 vma->vm_file->f_dentry->d_name.name);
		app = vma->vm_file;
		break;
	}

	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
			continue;
		my_offset = spu_ref - vma->vm_start;
		if (!vma->vm_file)
			goto fail_no_image_cookie;

		pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
			 my_offset, spu_ref,
			 vma->vm_file->f_dentry->d_name.name);
		*offsetp = my_offset;
		break;
	}

	*spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
	pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);

	up_read(&mm->mmap_sem);

out:
	return app_cookie;

fail_no_image_cookie:
	up_read(&mm->mmap_sem);

	printk(KERN_ERR "SPU_PROF: "
		"%s, line %d: Cannot find dcookie for SPU binary\n",
		__func__, __LINE__);
	goto out;
}


/* This function finds or creates cached context information for the
 * passed SPU and records SPU context information into the OProfile
 * event buffer.
 */
static int process_context_switch(struct spu *spu, unsigned long objectId)
{
	unsigned long flags;
	int retval;
	unsigned int offset = 0;
	unsigned long spu_cookie = 0, app_dcookie;

	retval = prepare_cached_spu_info(spu, objectId);
	if (retval)
		goto out;

	/* Get dcookie first because a mutex_lock is taken in that
	 * code path, so interrupts must not be disabled.
	 */
	app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
	if (!app_dcookie || !spu_cookie) {
		retval  = -ENOENT;
		goto out;
	}

	/* Record context info in event buffer */
	spin_lock_irqsave(&buffer_lock, flags);
	spu_buff_add(ESCAPE_CODE, spu->number);
	spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
	spu_buff_add(spu->number, spu->number);
	spu_buff_add(spu->pid, spu->number);
	spu_buff_add(spu->tgid, spu->number);
	spu_buff_add(app_dcookie, spu->number);
	spu_buff_add(spu_cookie, spu->number);
	spu_buff_add(offset, spu->number);

	/* Set flag to indicate SPU PC data can now be written out.  If
	 * the SPU program counter data is seen before an SPU context
	 * record is seen, the postprocessing will fail.
	 */
	spu_buff[spu->number].ctx_sw_seen = 1;

	spin_unlock_irqrestore(&buffer_lock, flags);
	smp_wmb();	/* insure spu event buffer updates are written */
			/* don't want entries intermingled... */
out:
	return retval;
}

/*
 * This function is invoked on either a bind_context or unbind_context.
 * If called for an unbind_context, the val arg is 0; otherwise,
 * it is the object-id value for the spu context.
 * The data arg is of type 'struct spu *'.
 */
static int spu_active_notify(struct notifier_block *self, unsigned long val,
				void *data)
{
	int retval;
	unsigned long flags;
	struct spu *the_spu = data;

	pr_debug("SPU event notification arrived\n");
	if (!val) {
		spin_lock_irqsave(&cache_lock, flags);
		retval = release_cached_info(the_spu->number);
		spin_unlock_irqrestore(&cache_lock, flags);
	} else {
		retval = process_context_switch(the_spu, val);
	}
	return retval;
}

static struct notifier_block spu_active = {
	.notifier_call = spu_active_notify,
};

static int number_of_online_nodes(void)
{
        u32 cpu; u32 tmp;
        int nodes = 0;
        for_each_online_cpu(cpu) {
                tmp = cbe_cpu_to_node(cpu) + 1;
                if (tmp > nodes)
                        nodes++;
        }
        return nodes;
}

static int oprofile_spu_buff_create(void)
{
	int spu;

	max_spu_buff = oprofile_get_cpu_buffer_size();

	for (spu = 0; spu < num_spu_nodes; spu++) {
		/* create circular buffers to store the data in.
		 * use locks to manage accessing the buffers
		 */
		spu_buff[spu].head = 0;
		spu_buff[spu].tail = 0;

		/*
		 * Create a buffer for each SPU.  Can't reliably
		 * create a single buffer for all spus due to not
		 * enough contiguous kernel memory.
		 */

		spu_buff[spu].buff = kzalloc((max_spu_buff
					      * sizeof(unsigned long)),
					     GFP_KERNEL);

		if (!spu_buff[spu].buff) {
			printk(KERN_ERR "SPU_PROF: "
			       "%s, line %d:  oprofile_spu_buff_create "
		       "failed to allocate spu buffer %d.\n",
			       __func__, __LINE__, spu);

			/* release the spu buffers that have been allocated */
			while (spu >= 0) {
				kfree(spu_buff[spu].buff);
				spu_buff[spu].buff = 0;
				spu--;
			}
			return -ENOMEM;
		}
	}
	return 0;
}

/* The main purpose of this function is to synchronize
 * OProfile with SPUFS by registering to be notified of
 * SPU task switches.
 *
 * NOTE: When profiling SPUs, we must ensure that only
 * spu_sync_start is invoked and not the generic sync_start
 * in drivers/oprofile/oprof.c.	 A return value of
 * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
 * accomplish this.
 */
int spu_sync_start(void)
{
	int spu;
	int ret = SKIP_GENERIC_SYNC;
	int register_ret;
	unsigned long flags = 0;

	spu_prof_num_nodes = number_of_online_nodes();
	num_spu_nodes = spu_prof_num_nodes * 8;
	INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);

	/* create buffer for storing the SPU data to put in
	 * the kernel buffer.
	 */
	ret = oprofile_spu_buff_create();
	if (ret)
		goto out;

	spin_lock_irqsave(&buffer_lock, flags);
	for (spu = 0; spu < num_spu_nodes; spu++) {
		spu_buff_add(ESCAPE_CODE, spu);
		spu_buff_add(SPU_PROFILING_CODE, spu);
		spu_buff_add(num_spu_nodes, spu);
	}
	spin_unlock_irqrestore(&buffer_lock, flags);

	for (spu = 0; spu < num_spu_nodes; spu++) {
		spu_buff[spu].ctx_sw_seen = 0;
		spu_buff[spu].last_guard_val = 0;
	}

	/* Register for SPU events  */
	register_ret = spu_switch_event_register(&spu_active);
	if (register_ret) {
		ret = SYNC_START_ERROR;
		goto out;
	}

	pr_debug("spu_sync_start -- running.\n");
out:
	return ret;
}

/* Record SPU program counter samples to the oprofile event buffer. */
void spu_sync_buffer(int spu_num, unsigned int *samples,
		     int num_samples)
{
	unsigned long long file_offset;
	unsigned long flags;
	int i;
	struct vma_to_fileoffset_map *map;
	struct spu *the_spu;
	unsigned long long spu_num_ll = spu_num;
	unsigned long long spu_num_shifted = spu_num_ll << 32;
	struct cached_info *c_info;

	/* We need to obtain the cache_lock here because it's
	 * possible that after getting the cached_info, the SPU job
	 * corresponding to this cached_info may end, thus resulting
	 * in the destruction of the cached_info.
	 */
	spin_lock_irqsave(&cache_lock, flags);
	c_info = get_cached_info(NULL, spu_num);
	if (!c_info) {
		/* This legitimately happens when the SPU task ends before all
		 * samples are recorded.
		 * No big deal -- so we just drop a few samples.
		 */
		pr_debug("SPU_PROF: No cached SPU contex "
			  "for SPU #%d. Dropping samples.\n", spu_num);
		goto out;
	}

	map = c_info->map;
	the_spu = c_info->the_spu;
	spin_lock(&buffer_lock);
	for (i = 0; i < num_samples; i++) {
		unsigned int sample = *(samples+i);
		int grd_val = 0;
		file_offset = 0;
		if (sample == 0)
			continue;
		file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);

		/* If overlays are used by this SPU application, the guard
		 * value is non-zero, indicating which overlay section is in
		 * use.	 We need to discard samples taken during the time
		 * period which an overlay occurs (i.e., guard value changes).
		 */
		if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
			spu_buff[spu_num].last_guard_val = grd_val;
			/* Drop the rest of the samples. */
			break;
		}

		/* We must ensure that the SPU context switch has been written
		 * out before samples for the SPU.  Otherwise, the SPU context
		 * information is not available and the postprocessing of the
		 * SPU PC will fail with no available anonymous map information.
		 */
		if (spu_buff[spu_num].ctx_sw_seen)
			spu_buff_add((file_offset | spu_num_shifted),
					 spu_num);
	}
	spin_unlock(&buffer_lock);
out:
	spin_unlock_irqrestore(&cache_lock, flags);
}


int spu_sync_stop(void)
{
	unsigned long flags = 0;
	int ret;
	int k;

	ret = spu_switch_event_unregister(&spu_active);

	if (ret)
		printk(KERN_ERR "SPU_PROF: "
		       "%s, line %d: spu_switch_event_unregister "	\
		       "returned %d\n",
		       __func__, __LINE__, ret);

	/* flush any remaining data in the per SPU buffers */
	sync_spu_buff();

	spin_lock_irqsave(&cache_lock, flags);
	ret = release_cached_info(RELEASE_ALL);
	spin_unlock_irqrestore(&cache_lock, flags);

	/* remove scheduled work queue item rather then waiting
	 * for every queued entry to execute.  Then flush pending
	 * system wide buffer to event buffer.
	 */
	cancel_delayed_work(&spu_work);

	for (k = 0; k < num_spu_nodes; k++) {
		spu_buff[k].ctx_sw_seen = 0;

		/*
		 * spu_sys_buff will be null if there was a problem
		 * allocating the buffer.  Only delete if it exists.
		 */
		kfree(spu_buff[k].buff);
		spu_buff[k].buff = 0;
	}
	pr_debug("spu_sync_stop -- done.\n");
	return ret;
}
Commit	Line	Data
1474855d BN	1	/*
	2	* Cell Broadband Engine OProfile Support
	3	*
	4	* (C) Copyright IBM Corporation 2006
	5	*
	6	* Author: Maynard Johnson <maynardj@us.ibm.com>
	7	*
	8	* This program is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU General Public License
	10	* as published by the Free Software Foundation; either version
	11	* 2 of the License, or (at your option) any later version.
	12	*/
	13
	14	/* The purpose of this file is to handle SPU event task switching
	15	* and to record SPU context information into the OProfile
	16	* event buffer.
	17	*
	18	* Additionally, the spu_sync_buffer function is provided as a helper
	19	* for recoding actual SPU program counter samples to the event buffer.
	20	*/
	21	#include <linux/dcookies.h>
	22	#include <linux/kref.h>
	23	#include <linux/mm.h>
4e950f6f	24	#include <linux/fs.h>
1474855d BN	25	#include <linux/module.h>
	26	#include <linux/notifier.h>
	27	#include <linux/numa.h>
	28	#include <linux/oprofile.h>
	29	#include <linux/spinlock.h>
	30	#include "pr_util.h"
	31
	32	#define RELEASE_ALL 9999
	33
	34	static DEFINE_SPINLOCK(buffer_lock);
	35	static DEFINE_SPINLOCK(cache_lock);
	36	static int num_spu_nodes;
	37	int spu_prof_num_nodes;
a5598ca0 CL	38
	39	struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
	40	struct delayed_work spu_work;
	41	static unsigned max_spu_buff;
	42
	43	static void spu_buff_add(unsigned long int value, int spu)
	44	{
	45	/* spu buff is a circular buffer. Add entries to the
	46	* head. Head is the index to store the next value.
	47	* The buffer is full when there is one available entry
	48	* in the queue, i.e. head and tail can't be equal.
	49	* That way we can tell the difference between the
	50	* buffer being full versus empty.
	51	*
	52	* ASSUPTION: the buffer_lock is held when this function
	53	* is called to lock the buffer, head and tail.
	54	*/
	55	int full = 1;
	56
	57	if (spu_buff[spu].head >= spu_buff[spu].tail) {
	58	if ((spu_buff[spu].head - spu_buff[spu].tail)
	59	< (max_spu_buff - 1))
	60	full = 0;
	61
	62	} else if (spu_buff[spu].tail > spu_buff[spu].head) {
	63	if ((spu_buff[spu].tail - spu_buff[spu].head)
	64	> 1)
	65	full = 0;
	66	}
	67
	68	if (!full) {
	69	spu_buff[spu].buff[spu_buff[spu].head] = value;
	70	spu_buff[spu].head++;
	71
	72	if (spu_buff[spu].head >= max_spu_buff)
	73	spu_buff[spu].head = 0;
	74	} else {
	75	/* From the user's perspective make the SPU buffer
	76	* size management/overflow look like we are using
	77	* per cpu buffers. The user uses the same
	78	* per cpu parameter to adjust the SPU buffer size.
	79	* Increment the sample_lost_overflow to inform
	80	* the user the buffer size needs to be increased.
	81	*/
	82	oprofile_cpu_buffer_inc_smpl_lost();
	83	}
	84	}
	85
	86	/* This function copies the per SPU buffers to the
	87	* OProfile kernel buffer.
	88	*/
	89	void sync_spu_buff(void)
	90	{
	91	int spu;
	92	unsigned long flags;
	93	int curr_head;
	94
	95	for (spu = 0; spu < num_spu_nodes; spu++) {
	96	/* In case there was an issue and the buffer didn't
	97	* get created skip it.
	98	*/
	99	if (spu_buff[spu].buff == NULL)
	100	continue;
	101
102	/* Hold the lock to make sure the head/tail
103	* doesn't change while spu_buff_add() is
104	* deciding if the buffer is full or not.
105	* Being a little paranoid.
106	*/
107	spin_lock_irqsave(&buffer_lock, flags);
108	curr_head = spu_buff[spu].head;
109	spin_unlock_irqrestore(&buffer_lock, flags);
110
111	/* Transfer the current contents to the kernel buffer.
112	* data can still be added to the head of the buffer.
113	*/
114	oprofile_put_buff(spu_buff[spu].buff,
115	spu_buff[spu].tail,
116	curr_head, max_spu_buff);
117
118	spin_lock_irqsave(&buffer_lock, flags);
119	spu_buff[spu].tail = curr_head;
120	spin_unlock_irqrestore(&buffer_lock, flags);
121	}
122
123	}
124
125	static void wq_sync_spu_buff(struct work_struct *work)
126	{
127	/* move data from spu buffers to kernel buffer */
128	sync_spu_buff();
129
130	/* only reschedule if profiling is not done */
131	if (spu_prof_running)
132	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
133	}
1474855d BN	134
	135	/* Container for caching information about an active SPU task. */
	136	struct cached_info {
	137	struct vma_to_fileoffset_map *map;
	138	struct spu the_spu; / needed to access pointer to local_store */
	139	struct kref cache_ref;
	140	};
	141
	142	static struct cached_info spu_info[MAX_NUMNODES 8];
	143
	144	static void destroy_cached_info(struct kref *kref)
	145	{
	146	struct cached_info *info;
	147
	148	info = container_of(kref, struct cached_info, cache_ref);
	149	vma_map_free(info->map);
	150	kfree(info);
	151	module_put(THIS_MODULE);
	152	}
	153
	154	/* Return the cached_info for the passed SPU number.
	155	* ATTENTION: Callers are responsible for obtaining the
	156	* cache_lock if needed prior to invoking this function.
	157	*/
	158	static struct cached_info get_cached_info(struct spu the_spu, int spu_num)
	159	{
	160	struct kref *ref;
	161	struct cached_info *ret_info;
	162
	163	if (spu_num >= num_spu_nodes) {
	164	printk(KERN_ERR "SPU_PROF: "
	165	"%s, line %d: Invalid index %d into spu info cache\n",
e48b1b45	166	__func__, __LINE__, spu_num);
1474855d BN	167	ret_info = NULL;
	168	goto out;
	169	}
	170	if (!spu_info[spu_num] && the_spu) {
	171	ref = spu_get_profile_private_kref(the_spu->ctx);
	172	if (ref) {
	173	spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
	174	kref_get(&spu_info[spu_num]->cache_ref);
	175	}
	176	}
	177
	178	ret_info = spu_info[spu_num];
	179	out:
	180	return ret_info;
	181	}
	182
	183
	184	/* Looks for cached info for the passed spu. If not found, the
	185	* cached info is created for the passed spu.
	186	* Returns 0 for success; otherwise, -1 for error.
	187	*/
	188	static int
	189	prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
	190	{
	191	unsigned long flags;
	192	struct vma_to_fileoffset_map *new_map;
	193	int retval = 0;
	194	struct cached_info *info;
	195
	196	/* We won't bother getting cache_lock here since
	197	* don't do anything with the cached_info that's returned.
	198	*/
	199	info = get_cached_info(spu, spu->number);
	200
	201	if (info) {
	202	pr_debug("Found cached SPU info.\n");
	203	goto out;
	204	}
	205
	206	/* Create cached_info and set spu_info[spu->number] to point to it.
	207	* spu->number is a system-wide value, not a per-node value.
	208	*/
	209	info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
	210	if (!info) {
	211	printk(KERN_ERR "SPU_PROF: "
	212	"%s, line %d: create vma_map failed\n",
e48b1b45	213	__func__, __LINE__);
1474855d BN	214	retval = -ENOMEM;
	215	goto err_alloc;
	216	}
	217	new_map = create_vma_map(spu, objectId);
	218	if (!new_map) {
	219	printk(KERN_ERR "SPU_PROF: "
	220	"%s, line %d: create vma_map failed\n",
e48b1b45	221	__func__, __LINE__);
1474855d BN	222	retval = -ENOMEM;
	223	goto err_alloc;
	224	}
	225
	226	pr_debug("Created vma_map\n");
	227	info->map = new_map;
	228	info->the_spu = spu;
	229	kref_init(&info->cache_ref);
	230	spin_lock_irqsave(&cache_lock, flags);
	231	spu_info[spu->number] = info;
	232	/* Increment count before passing off ref to SPUFS. */
	233	kref_get(&info->cache_ref);
	234
	235	/* We increment the module refcount here since SPUFS is
	236	* responsible for the final destruction of the cached_info,
	237	* and it must be able to access the destroy_cached_info()
	238	* function defined in the OProfile module. We decrement
	239	* the module refcount in destroy_cached_info.
	240	*/
	241	try_module_get(THIS_MODULE);
	242	spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
	243	destroy_cached_info);
	244	spin_unlock_irqrestore(&cache_lock, flags);
	245	goto out;
	246
	247	err_alloc:
	248	kfree(info);
	249	out:
	250	return retval;
	251	}
	252
	253	/*
	254	* NOTE: The caller is responsible for locking the
	255	* cache_lock prior to calling this function.
	256	*/
	257	static int release_cached_info(int spu_index)
	258	{
	259	int index, end;
	260
	261	if (spu_index == RELEASE_ALL) {
	262	end = num_spu_nodes;
	263	index = 0;
	264	} else {
	265	if (spu_index >= num_spu_nodes) {
	266	printk(KERN_ERR "SPU_PROF: "
	267	"%s, line %d: "
	268	"Invalid index %d into spu info cache\n",
e48b1b45	269	__func__, __LINE__, spu_index);
1474855d BN	270	goto out;
	271	}
	272	end = spu_index + 1;
	273	index = spu_index;
	274	}
	275	for (; index < end; index++) {
	276	if (spu_info[index]) {
	277	kref_put(&spu_info[index]->cache_ref,
	278	destroy_cached_info);
	279	spu_info[index] = NULL;
	280	}
	281	}
	282
	283	out:
	284	return 0;
	285	}
	286
	287	/* The source code for fast_get_dcookie was "borrowed"
	288	* from drivers/oprofile/buffer_sync.c.
	289	*/
	290
	291	/* Optimisation. We can manage without taking the dcookie sem
	292	* because we cannot reach this code without at least one
	293	* dcookie user still being registered (namely, the reader
	294	* of the event buffer).
	295	*/
448678a0	296	static inline unsigned long fast_get_dcookie(struct path *path)
1474855d BN	297	{
	298	unsigned long cookie;
	299
c2452f32	300	if (path->dentry->d_flags & DCACHE_COOKIE)
448678a0 JB	301	return (unsigned long)path->dentry;
448678a0 JB	302	get_dcookie(path, &cookie);
1474855d BN	303	return cookie;
	304	}
	305
	306	/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
	307	* which corresponds loosely to "application name". Also, determine
	308	* the offset for the SPU ELF object. If computed offset is
	309	* non-zero, it implies an embedded SPU object; otherwise, it's a
	310	* separate SPU binary, in which case we retrieve it's dcookie.
	311	* For the embedded case, we must determine if SPU ELF is embedded
	312	* in the executable application or another file (i.e., shared lib).
	313	* If embedded in a shared lib, we must get the dcookie and return
	314	* that to the caller.
	315	*/
	316	static unsigned long
	317	get_exec_dcookie_and_offset(struct spu spu, unsigned int offsetp,
	318	unsigned long *spu_bin_dcookie,
	319	unsigned long spu_ref)
	320	{
	321	unsigned long app_cookie = 0;
	322	unsigned int my_offset = 0;
	323	struct file *app = NULL;
	324	struct vm_area_struct *vma;
	325	struct mm_struct *mm = spu->mm;
	326
	327	if (!mm)
	328	goto out;
	329
	330	down_read(&mm->mmap_sem);
	331
	332	for (vma = mm->mmap; vma; vma = vma->vm_next) {
	333	if (!vma->vm_file)
	334	continue;
	335	if (!(vma->vm_flags & VM_EXECUTABLE))
	336	continue;
448678a0	337	app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
1474855d BN	338	pr_debug("got dcookie for %s\n",
	339	vma->vm_file->f_dentry->d_name.name);
	340	app = vma->vm_file;
	341	break;
	342	}
	343
	344	for (vma = mm->mmap; vma; vma = vma->vm_next) {
	345	if (vma->vm_start > spu_ref \|\| vma->vm_end <= spu_ref)
	346	continue;
	347	my_offset = spu_ref - vma->vm_start;
	348	if (!vma->vm_file)
	349	goto fail_no_image_cookie;
	350
	351	pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
	352	my_offset, spu_ref,
	353	vma->vm_file->f_dentry->d_name.name);
	354	*offsetp = my_offset;
	355	break;
	356	}
	357
448678a0	358	*spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
1474855d BN	359	pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
	360
	361	up_read(&mm->mmap_sem);
	362
	363	out:
	364	return app_cookie;
	365
	366	fail_no_image_cookie:
	367	up_read(&mm->mmap_sem);
	368
	369	printk(KERN_ERR "SPU_PROF: "
	370	"%s, line %d: Cannot find dcookie for SPU binary\n",
e48b1b45	371	__func__, __LINE__);
1474855d BN	372	goto out;
	373	}
	374
	375
	376
	377	/* This function finds or creates cached context information for the
	378	* passed SPU and records SPU context information into the OProfile
	379	* event buffer.
	380	*/
	381	static int process_context_switch(struct spu *spu, unsigned long objectId)
	382	{
	383	unsigned long flags;
	384	int retval;
	385	unsigned int offset = 0;
	386	unsigned long spu_cookie = 0, app_dcookie;
	387
	388	retval = prepare_cached_spu_info(spu, objectId);
	389	if (retval)
	390	goto out;
	391
	392	/* Get dcookie first because a mutex_lock is taken in that
	393	* code path, so interrupts must not be disabled.
	394	*/
	395	app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
	396	if (!app_dcookie \|\| !spu_cookie) {
	397	retval = -ENOENT;
	398	goto out;
	399	}
	400
	401	/* Record context info in event buffer */
	402	spin_lock_irqsave(&buffer_lock, flags);
a5598ca0 CL	403	spu_buff_add(ESCAPE_CODE, spu->number);
	404	spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
	405	spu_buff_add(spu->number, spu->number);
	406	spu_buff_add(spu->pid, spu->number);
	407	spu_buff_add(spu->tgid, spu->number);
	408	spu_buff_add(app_dcookie, spu->number);
	409	spu_buff_add(spu_cookie, spu->number);
	410	spu_buff_add(offset, spu->number);
	411
	412	/* Set flag to indicate SPU PC data can now be written out. If
	413	* the SPU program counter data is seen before an SPU context
	414	* record is seen, the postprocessing will fail.
	415	*/
	416	spu_buff[spu->number].ctx_sw_seen = 1;
	417
1474855d BN	418	spin_unlock_irqrestore(&buffer_lock, flags);
	419	smp_wmb(); /* insure spu event buffer updates are written */
	420	/* don't want entries intermingled... */
	421	out:
	422	return retval;
	423	}
	424
	425	/*
	426	* This function is invoked on either a bind_context or unbind_context.
	427	* If called for an unbind_context, the val arg is 0; otherwise,
	428	* it is the object-id value for the spu context.
	429	* The data arg is of type 'struct spu *'.
	430	*/
	431	static int spu_active_notify(struct notifier_block *self, unsigned long val,
	432	void *data)
	433	{
	434	int retval;
	435	unsigned long flags;
	436	struct spu *the_spu = data;
	437
	438	pr_debug("SPU event notification arrived\n");
	439	if (!val) {
	440	spin_lock_irqsave(&cache_lock, flags);
	441	retval = release_cached_info(the_spu->number);
	442	spin_unlock_irqrestore(&cache_lock, flags);
	443	} else {
	444	retval = process_context_switch(the_spu, val);
	445	}
	446	return retval;
	447	}
	448
	449	static struct notifier_block spu_active = {
	450	.notifier_call = spu_active_notify,
	451	};
	452
	453	static int number_of_online_nodes(void)
	454	{
	455	u32 cpu; u32 tmp;
	456	int nodes = 0;
	457	for_each_online_cpu(cpu) {
	458	tmp = cbe_cpu_to_node(cpu) + 1;
	459	if (tmp > nodes)
	460	nodes++;
	461	}
	462	return nodes;
	463	}
	464
a5598ca0 CL	465	static int oprofile_spu_buff_create(void)
	466	{
	467	int spu;
	468
	469	max_spu_buff = oprofile_get_cpu_buffer_size();
	470
	471	for (spu = 0; spu < num_spu_nodes; spu++) {
	472	/* create circular buffers to store the data in.
	473	* use locks to manage accessing the buffers
	474	*/
	475	spu_buff[spu].head = 0;
	476	spu_buff[spu].tail = 0;
	477
	478	/*
	479	* Create a buffer for each SPU. Can't reliably
	480	* create a single buffer for all spus due to not
	481	* enough contiguous kernel memory.
	482	*/
	483
	484	spu_buff[spu].buff = kzalloc((max_spu_buff
	485	* sizeof(unsigned long)),
	486	GFP_KERNEL);
	487
	488	if (!spu_buff[spu].buff) {
	489	printk(KERN_ERR "SPU_PROF: "
	490	"%s, line %d: oprofile_spu_buff_create "
	491	"failed to allocate spu buffer %d.\n",
	492	__func__, __LINE__, spu);
	493
	494	/* release the spu buffers that have been allocated */
	495	while (spu >= 0) {
	496	kfree(spu_buff[spu].buff);
	497	spu_buff[spu].buff = 0;
	498	spu--;
	499	}
	500	return -ENOMEM;
	501	}
	502	}
	503	return 0;
	504	}
	505
1474855d BN	506	/* The main purpose of this function is to synchronize
	507	* OProfile with SPUFS by registering to be notified of
	508	* SPU task switches.
	509	*
	510	* NOTE: When profiling SPUs, we must ensure that only
	511	* spu_sync_start is invoked and not the generic sync_start
	512	* in drivers/oprofile/oprof.c. A return value of
	513	* SKIP_GENERIC_SYNC or SYNC_START_ERROR will
	514	* accomplish this.
	515	*/
	516	int spu_sync_start(void)
	517	{
a5598ca0	518	int spu;
1474855d BN	519	int ret = SKIP_GENERIC_SYNC;
	520	int register_ret;
	521	unsigned long flags = 0;
	522
	523	spu_prof_num_nodes = number_of_online_nodes();
	524	num_spu_nodes = spu_prof_num_nodes * 8;
a5598ca0 CL	525	INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
	526
	527	/* create buffer for storing the SPU data to put in
	528	* the kernel buffer.
	529	*/
	530	ret = oprofile_spu_buff_create();
	531	if (ret)
	532	goto out;
1474855d BN	533
1474855d BN	534	spin_lock_irqsave(&buffer_lock, flags);
a5598ca0 CL	535	for (spu = 0; spu < num_spu_nodes; spu++) {
	536	spu_buff_add(ESCAPE_CODE, spu);
	537	spu_buff_add(SPU_PROFILING_CODE, spu);
	538	spu_buff_add(num_spu_nodes, spu);
	539	}
1474855d BN	540	spin_unlock_irqrestore(&buffer_lock, flags);
1474855d BN	541
a5598ca0 CL	542	for (spu = 0; spu < num_spu_nodes; spu++) {
	543	spu_buff[spu].ctx_sw_seen = 0;
	544	spu_buff[spu].last_guard_val = 0;
	545	}
	546
1474855d BN	547	/* Register for SPU events */
	548	register_ret = spu_switch_event_register(&spu_active);
	549	if (register_ret) {
	550	ret = SYNC_START_ERROR;
	551	goto out;
	552	}
	553
1474855d BN	554	pr_debug("spu_sync_start -- running.\n");
	555	out:
	556	return ret;
	557	}
	558
	559	/* Record SPU program counter samples to the oprofile event buffer. */
	560	void spu_sync_buffer(int spu_num, unsigned int *samples,
	561	int num_samples)
	562	{
	563	unsigned long long file_offset;
	564	unsigned long flags;
	565	int i;
	566	struct vma_to_fileoffset_map *map;
	567	struct spu *the_spu;
	568	unsigned long long spu_num_ll = spu_num;
	569	unsigned long long spu_num_shifted = spu_num_ll << 32;
	570	struct cached_info *c_info;
	571
	572	/* We need to obtain the cache_lock here because it's
	573	* possible that after getting the cached_info, the SPU job
	574	* corresponding to this cached_info may end, thus resulting
	575	* in the destruction of the cached_info.
	576	*/
	577	spin_lock_irqsave(&cache_lock, flags);
	578	c_info = get_cached_info(NULL, spu_num);
	579	if (!c_info) {
	580	/* This legitimately happens when the SPU task ends before all
	581	* samples are recorded.
	582	* No big deal -- so we just drop a few samples.
	583	*/
	584	pr_debug("SPU_PROF: No cached SPU contex "
	585	"for SPU #%d. Dropping samples.\n", spu_num);
	586	goto out;
	587	}
	588
	589	map = c_info->map;
	590	the_spu = c_info->the_spu;
	591	spin_lock(&buffer_lock);
	592	for (i = 0; i < num_samples; i++) {
	593	unsigned int sample = *(samples+i);
	594	int grd_val = 0;
	595	file_offset = 0;
	596	if (sample == 0)
	597	continue;
	598	file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
	599
	600	/* If overlays are used by this SPU application, the guard
	601	* value is non-zero, indicating which overlay section is in
	602	* use. We need to discard samples taken during the time
	603	* period which an overlay occurs (i.e., guard value changes).
	604	*/
a5598ca0 CL	605	if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
a5598ca0 CL	606	spu_buff[spu_num].last_guard_val = grd_val;
1474855d BN	607	/* Drop the rest of the samples. */
	608	break;
	609	}
	610
a5598ca0 CL	611	/* We must ensure that the SPU context switch has been written
	612	* out before samples for the SPU. Otherwise, the SPU context
	613	* information is not available and the postprocessing of the
	614	* SPU PC will fail with no available anonymous map information.
	615	*/
	616	if (spu_buff[spu_num].ctx_sw_seen)
	617	spu_buff_add((file_offset \| spu_num_shifted),
	618	spu_num);
1474855d BN	619	}
	620	spin_unlock(&buffer_lock);
	621	out:
	622	spin_unlock_irqrestore(&cache_lock, flags);
	623	}
	624
	625
	626	int spu_sync_stop(void)
	627	{
	628	unsigned long flags = 0;
a5598ca0 CL	629	int ret;
	630	int k;
	631
	632	ret = spu_switch_event_unregister(&spu_active);
	633
	634	if (ret)
1474855d	635	printk(KERN_ERR "SPU_PROF: "
a5598ca0 CL	636	"%s, line %d: spu_switch_event_unregister " \
	637	"returned %d\n",
	638	__func__, __LINE__, ret);
	639
	640	/* flush any remaining data in the per SPU buffers */
	641	sync_spu_buff();
1474855d BN	642
	643	spin_lock_irqsave(&cache_lock, flags);
	644	ret = release_cached_info(RELEASE_ALL);
	645	spin_unlock_irqrestore(&cache_lock, flags);
a5598ca0 CL	646
	647	/* remove scheduled work queue item rather then waiting
	648	* for every queued entry to execute. Then flush pending
	649	* system wide buffer to event buffer.
	650	*/
	651	cancel_delayed_work(&spu_work);
	652
	653	for (k = 0; k < num_spu_nodes; k++) {
	654	spu_buff[k].ctx_sw_seen = 0;
	655
	656	/*
	657	* spu_sys_buff will be null if there was a problem
	658	* allocating the buffer. Only delete if it exists.
	659	*/
	660	kfree(spu_buff[k].buff);
	661	spu_buff[k].buff = 0;
	662	}
1474855d BN	663	pr_debug("spu_sync_stop -- done.\n");
	664	return ret;
	665	}
	666