/* $Id: pages.c,v 1.4 2007/01/25 15:13:06 cbass Exp $ */
/*****************************************************************************/
/**
 *  \file pages.c
 *  \brief task, dataspace, page handling 
 * 
 *  \date 12/20/06 20:09:32 CET
 *  \author Sebastian Sumpf <Sebastian.Sumpf@inf.tu-dresden.de>
 */ 
/******************************************************************************/
#include<l4/log/l4log.h>
#include<l4/l4rm/l4rm.h>
#include<l4/util/atomic.h>
#include "__types.h"
#include "__lxcalls.h"


#define TASK_FROZEN  0
#define TASK_BEGIN	 1
 
static lx_task_t lx_tasks[LX_MAX_INSTANCES];
static l4slab_cache_t page_cache;


static void __lx_page_del(lx_page_t * page);


static int avl_page_cmp(void * a, void * b)
{
   return ((lx_page_t *)a)->offset - ((lx_page_t *)b)->offset;
}


static lx_page_t * __page_get_orig(lx_page_t * page)
{	
	lx_page_t * ref = page;
	int cnt  = 0;
	while(IS_PAGE_REF(ref->flags))
	{
		ref = (lx_page_t *)ref->addr;
		if(cnt >= 2)
			LOGd(DBG_ERROR, "Endless page reference page %p, ref: %08lx", ref, ref->addr);
		cnt++;
	}
	return ref;
}

static void __page_del(avl * a)
{
	__lx_page_del((lx_page_t*)a);
}



/*******************************************************************************
 * TASK LEVEL
 ******************************************************************************/


int lx_task_add_pos(l4_taskid_t task_id, int pos)
{
	if(pos < 0)
	{
		pos = TASK_BEGIN;
		while(!l4_is_invalid_id(lx_tasks[pos].task_id) && pos < LX_MAX_INSTANCES 
				&& !l4_task_equal(task_id, lx_tasks[pos].task_id))
			pos++;
		if(pos >= LX_MAX_INSTANCES)
		{
			LOGd(DBG_ERROR, "Error: Task limit exceeded");
			return -L4_ERES;
		}
	}
		
  lx_tasks[pos].task_id = task_id;
	return pos;
}

int lx_task_do_get(l4_taskid_t task_id, int pos, lx_task_t ** task)
{
	static int last = TASK_BEGIN;
	
  if(l4_task_equal(task_id, lx_tasks[last].task_id) && pos < 0)
		pos = last;
	else if(pos < 0) 
	{
		pos = TASK_BEGIN;
		while(pos < LX_MAX_INSTANCES && 
				!l4_task_equal(task_id, lx_tasks[pos].task_id))
			pos++;
		if(pos >= LX_MAX_INSTANCES)
			return -L4_ENOTFOUND;
	}
	*task = &lx_tasks[pos];
	last = (pos < TASK_BEGIN) ? TASK_BEGIN : pos;
	return pos;
}

lx_task_t * lx_task_get_frozen(void) 
{
	lx_task_t * task;
	lx_task_get_pos(TASK_FROZEN, &task);
	return task;
}

void lx_task_del(lx_task_t * task, int pos)
{
	while(task->ds[0].ds_desc != NULL)
	{
		lx_ds_del(task, dsmlib_get_id(task->ds[0].ds_desc));
	}
	
	lx_stats_set_free(task);
	
	if(pos < LX_MAX_INSTANCES - 1)
		memmove((void*)&lx_tasks[pos], (void *)&(lx_tasks[pos + 1]),
			(LX_MAX_INSTANCES - pos - 1) * sizeof(lx_task_t));

	lx_tasks[LX_MAX_INSTANCES - 1] = LX_INVALID_TASK;
}

/*******************************************************************************
 * DATASPACE LEVEL
 ******************************************************************************/


int lx_ds_do_get(lx_task_t * task, int pos, unsigned long ds_id,  lx_ds_t ** ds)
{
	static int last = 0;
	
	if(task->ds[last].ds_desc != NULL 
	     && dsmlib_get_id(task->ds[last].ds_desc) == ds_id && pos < 0)
		pos = last;
	else if(pos < 0)
	{
		pos = 0;
		while(task->ds[pos].ds_desc != NULL
		        && dsmlib_get_id(task->ds[pos].ds_desc) != ds_id 
		        && pos < LX_MAX_DS)
			pos++;
		
		if(pos >= LX_MAX_DS ||
		   task->ds[pos].ds_desc == NULL)
			return -L4_ENOTFOUND;
	}
	*ds = &(task->ds[pos]);
	last = pos;
	return pos;
}

int lx_ds_add(lx_task_t * task, l4dm_dataspace_t ds, 
              dsmlib_ds_desc_t * ds_desc, l4_size_t size)
{
	lx_ds_t new_ds = LX_DS_INITIALIZER;
	int i = 0;
	
	while(task->ds[i].ds_desc && i < LX_MAX_DS)
		i++;

	if(i >= LX_MAX_DS)
	{
		LOGd(DBG_ERROR, "Error: Dataspaces limit exceeded for task "l4util_idfmt
		     "\n", l4util_idstr(task->task_id));
		return -L4_ERES;
	}

/*	if(lx_ds_is_equal(task->ds[i].ds, ds))
		return i;*/
	
	LOGd(DBG_SUSP, "Adding dataspace at pos %d ds_id %u native_id %u", 
	     i, dsmlib_get_id(ds_desc), ds.id);
	new_ds.ds_desc = ds_desc;
	new_ds.ds = ds;
	new_ds.size = size;
	new_ds.avl_tree.compar = avl_page_cmp;
	new_ds.avl_tree.root   = 0;
	task->ds[i] = new_ds;	
	return i;
}
	
void lx_ds_del(lx_task_t * task, unsigned long ds_id)
{
 //free pages
	lx_ds_t * ds;
	int pos;
 
 	if((pos = lx_ds_get(task, ds_id, &ds)) < 0)
		return;
	
	avl_iterate(&(ds->avl_tree), __page_del);

	if(!l4dm_is_invalid_ds(ds->ds)) {
		LOGd(DBG_CLOSE, "Closing ds_id %d native_id %d", dsmlib_get_id(ds->ds_desc),
		     ds->ds.id);
		l4dm_close(&ds->ds);
		dsmlib_release_dataspace(ds->ds_desc);
	}
	
	if(pos < LX_MAX_DS - 1)
		memmove((void*)&(task->ds[pos]), (void *)&(task->ds[pos + 1]),
		        (LX_MAX_DS - pos - 1) * sizeof(lx_ds_t));
	
	task->ds[LX_MAX_DS - 1] = LX_INVALID_DS;
}
 
int lx_ds_add_addr(l4dm_dataspace_t ds, dsmlib_ds_desc_t * ds_desc, 
                   l4_size_t size, l4_addr_t addr)
{
	lx_task_t * task = lx_task_get_frozen();
	lx_ds_t  * lxds;
	l4_addr_t page_addr = addr;
	l4_offs_t offs;
	
	int ret  = lx_ds_add(task, ds, ds_desc, size);
	
	if(ret < 0) return ret;

	lx_ds_get_pos(task, ret, &lxds);

	while(page_addr < addr + size)
	{
		offs  = page_addr - addr;
		if((ret = lx_page_add(lxds, offs, page_addr, 0)) < 0)
			return ret;

		page_addr += L4_PAGESIZE;
		lx_stats.page_frozen++;
		lx_stats.mem_usage += L4_PAGESIZE;
	}
	
	return 0;
}
	
int lx_ds_add_ds(l4dm_dataspace_t ds, dsmlib_ds_desc_t * ds_desc, l4_size_t size, void * addr)
{
	int error = 0;
	LOGd(DBG_SUSP, "DS size %08x", size);
	
	error = l4rm_attach(&ds, size, 0, L4DM_RW|L4RM_MAP|L4RM_LOG2_ALIGNED, &addr);
	if(error)
	{
		LOGd(DBG_ERROR, "Failed to attach dataspace: %s (%d)",
		     l4env_errstr(error), error);
		return error;
	}
	return lx_ds_add_addr(ds, ds_desc, size, (l4_addr_t)addr);
}

/* caution just for continuous memory */
int lx_ds_search_equal(lx_task_t * task, void * data, l4_size_t size, 
                       unsigned long * ds_id)
{
	int pos;
	int ret = 1;

	lx_ds_t * lxds;
	l4_addr_t map_addr;
	size = (size + L4_PAGESIZE - 1) & L4_PAGEMASK;

	for(pos = 0;;pos++)
	{
		if(task->ds[pos].ds_desc == NULL && pos >= LX_MAX_DS)
			break;
		
		lxds = &(task->ds[pos]);
		
		if(lxds->size != size)
			continue;

		if(lx_ds_get_map_addr(task, dsmlib_get_id(lxds->ds_desc), &map_addr) < 0)
			continue;

		ret = memcmp((void*)map_addr, data, size);
		if(!ret) 
		{
			*ds_id = dsmlib_get_id(lxds->ds_desc);
			break;
		}
	}
	
	return ret;
}

/* retrieve address of first page in dataspace */
int lx_ds_get_map_addr(lx_task_t * task, unsigned long ds_id,
                       l4_addr_t * addr)
{
	int err;
	lx_ds_t * lxds;
	lx_page_t * page;
	
	if((err = lx_ds_get(task, ds_id, &lxds)) < 0)
		return err;
	
	if((err = lx_page_get(lxds, 0, &page)))
		return err;
	
	*addr =  lx_page_get_map_addr(page);
	return 0;
}


/*******************************************************************************
 * PAGE LEVEL
 ******************************************************************************/


int lx_page_add(lx_ds_t * lxds, l4_offs_t offs, l4_addr_t addr, 
                l4_uint32_t flags)
{
	lx_page_t * page;

	page = lx_slab_alloc(&page_cache);
	if(page == NULL)
		return -L4_ENOMEM;

	page->offset = offs;
	page->addr   = addr;
	page->flags  = flags;
	INC_PAGE_REF_CNT(page->flags);
	
	avl_insert(&(lxds->avl_tree), (avl*)page);
	
	/* if cow page add to hash */
	if(lx_frozen && !IS_LOCKED_PAGE(flags)) 
		lx_hash_register_page(lxds, page);
	return 0;
}


int lx_page_get(lx_ds_t * lxds, l4_offs_t offs, lx_page_t ** page) 
{
	lx_page_t search;
	search.offset = offs;
	
	if((*page = ((lx_page_t*)avl_get_first(&(lxds->avl_tree), 
		(avl *)&search))) == NULL)
		return -L4_ENOTFOUND;

	return 0;
}


void lx_page_del(lx_ds_t * lxds, l4_offs_t offs)
{
	lx_page_t * page;

	if(lx_page_get(lxds, offs, &page))
		return;
	

	avl_remove(&(lxds->avl_tree), (avl *)page);
	lx_slab_free(&page_cache, (void*)page);
}

l4_addr_t lx_page_get_map_addr(lx_page_t * page)
{
	lx_page_t * ref = page;
	void * moved_addr;
	int cnt = 0;
	//while(IS_PAGE_REF(ref->flags))
  //   ref = (lx_page_t *)ref->addr;

	while(IS_PAGE_REF(ref->flags))
	{
		ref = (lx_page_t *)ref->addr;
		if(cnt >= 2)
			LOGd(DBG_ERROR, "Endless page reference page %p, ref: %08lx", ref, ref->addr);
		cnt++;
	}
	
	moved_addr = lx_cow_chk_moved(ref->addr);
	if(moved_addr != NULL)
		ref->addr = (l4_addr_t)moved_addr;

	return (l4_addr_t)ref->addr;
}

void lx_page_release(lx_page_t * page)
{
	LOGd(DBG_HASH, "Releasing invalid page object: %08lx", page->addr);
	lx_slab_free(&page_cache, (void*)page);
}	


/*******************************************************************************
 * PAGEFAULT HANDLING
 ******************************************************************************/


/* internal for copy on write handling */
static void __lx_page_del(lx_page_t * page)
{
	lx_page_t * page_orig;
	
	//free possible cow data
	DEC_PAGE_REF_CNT(page->flags);
	//LOGd(DBG_CLEAN, "Deleting page at %08lx\n", page->addr);	
	if(lx_frozen)
	{
		if(GET_PAGE_REF_CNT(page->flags) == 0 &&
			!IS_PAGE_REF(page->flags))
		{
			lx_cow_set_free(lx_page_get_map_addr(page));
			l4util_dec32(&(lx_stats.page_copied));
			/* page object will be freed by hashing-thread */
			SET_INVALID(page->flags);
		}
		else if(IS_PAGE_REF(page->flags))
		{
			page_orig = __page_get_orig(page);
			DEC_PAGE_REF_CNT(page_orig->flags);
			if(GET_PAGE_REF_CNT(page_orig->flags) == 0)
			{
				lx_cow_set_free(lx_page_get_map_addr(page));
				l4util_dec32(&(lx_stats.page_copied));
				/* page object will be freed by hashing-thread */
				SET_INVALID(page->flags);
			}
			lx_stats.page_ref--;
			LOGd(DBG_CLEAN, "REF PAGE: offset %08lx map_addr %08lx ref_cnt %d", 
			     page->offset, page->addr, GET_PAGE_REF_CNT(page->flags));
			lx_slab_free(&page_cache, (void*)page);
		}
	}
	else 
	{
		l4_fpage_unmap(l4_fpage(page->addr, L4_LOG2_PAGESIZE, 0, 0),
		                        L4_FP_FLUSH_PAGE | L4_FP_ALL_SPACES);
		lx_slab_free(&page_cache, (void*)page);
	}
}


static l4_addr_t __page_get_frozen(unsigned long ds_id, unsigned long offset)
{
	lx_ds_t * lxds;
	lx_page_t * page;
	int error;
	
	error = lx_ds_get(lx_task_get_frozen(), ds_id, &lxds);
	if(error < 0)
	{
		LOGd(DBG_ERROR, "No such dataspace ds: %lu offset: %08lx",
		     ds_id, offset);
		enter_kdebug("Segmentation fault");
	}

	error = lx_page_get(lxds, offset, &page);
	if(error < 0)
	{
		LOGd(DBG_ERROR, "No such page ds: %lu offset: %08lx",
		     ds_id, offset);
		enter_kdebug("Segmentation fault");
	}

	return lx_page_get_map_addr(page);
}

static int __page_copy(void * from, void ** to)
{
	
	/* add page */	
	*to  = lx_cow_get_free();
	if(*to == NULL)
		return -L4_ENOMEM;
		
	memcpy(*to, (void*)from, L4_PAGESIZE);
	l4util_inc32(&lx_stats.page_copied);	
	return 0;
}

int
lx_page_fault(l4_threadid_t src, 
              unsigned long ds_id, 
              unsigned long offset, 
              int rw,
              l4_addr_t * map_addr)
{
	lx_task_t * task;
	lx_ds_t * lxds, * lxds_frozen;
	lx_page_t * page, * page_orig;
	lx_task_stats_t * task_stats;
	int error, pos;
	void * new_addr;
	
	
	*map_addr = 0;	
	offset &= L4_PAGEMASK;
	
	/* if there is no complete image yet, return page */
	if(!lx_frozen)
	{
		*map_addr = __page_get_frozen(ds_id, offset);
		return 0;
	}
	
	if((error = lx_task_get(src, &task)) < 0)
	{
		/* OOps */
		LOGd(DBG_ERROR, "No such task "l4util_idfmt, l4util_idstr(src));
		enter_kdebug("Alien task");
		return error;
	}
	
	lx_stats_get_task(task, &task_stats);

	error = lx_ds_get(task, ds_id, &lxds);
	if(error == -L4_ENOTFOUND)
	{
		/* ro fault, no data, return frozen page */
		if(!rw) {
			*map_addr = __page_get_frozen(ds_id, offset);

			return 0;
		}

		/* add dataspace to task */
		lx_ds_get(lx_task_get_frozen(), ds_id, &lxds_frozen);
		pos = lx_ds_add(task, L4DM_INVALID_DATASPACE, lxds_frozen->ds_desc, 
		                lxds_frozen->size);
		lx_ds_get_pos(task, pos, &lxds);
	}

	
	l4semaphore_down(&(lxds->sem));
	error = lx_page_get(lxds, offset, &page);
	if(error == -L4_ENOTFOUND)
	{
		*map_addr = __page_get_frozen(ds_id, offset);
	
		/* ro fault, no data, return frozen page */
		if(!rw)
		{
			/*
			LOGd(DBG_PAGEHANDLING, "Copying  ro page from frozen addr: %08lx\n",
				*map_addr);*/
			l4semaphore_up(&(lxds->sem));
			return 0;
		}
		/* no page, copy from frozen */	
		if((error = __page_copy((void*)*map_addr, &new_addr)))
		{	
			l4semaphore_up(&(lxds->sem));
			return error;
		}
		
		*map_addr = (l4_addr_t)new_addr;

		l4util_inc32(&(task_stats->page_cnt));
		/*LOGd(DBG_PAGEHANDLING, "Copying  rw-page from frozen addr: %08lx\n",
			,*map_addr);*/

		error = lx_page_add(lxds, offset, *map_addr, 0);
		l4semaphore_up(&(lxds->sem));
		return error;
	}
	
	if(!rw)
	{
		*map_addr = lx_page_get_map_addr(page);
		l4semaphore_up(&(lxds->sem));
		return 0;
	}

	LOGd(DBG_SEM, "PAGE downing %p", lxds);

	/* if we get a page reference, decrement refcounter at original */
	if(IS_PAGE_REF(page->flags))
	{
		l4semaphore_down(&lx_orig_sem);
		page_orig = __page_get_orig(page);
		DEC_PAGE_REF_CNT(page_orig->flags);
		
		if((error = __page_copy(
		   (void *)lx_page_get_map_addr(page_orig), &new_addr)))
		{
			l4semaphore_up(&(lxds->sem));
			return error;
		}

		/* clear if there are no references to original page */
		if(GET_PAGE_REF_CNT(page_orig->flags) == 0)
		{
			INC_PAGE_REF_CNT(page_orig->flags);
			__lx_page_del(page_orig);
		}
		
		l4util_dec32(&lx_stats.page_ref);
		l4semaphore_up(&lx_orig_sem);
		
		*map_addr = page->addr = (l4_addr_t)new_addr;
		UNSET_PAGE_REF(page->flags);
		UNSET_MAPPED_RO(page->flags);

		LOGd(DBG_PAGEHANDLING, 
		     "Copying referenced orig page addr: %08lx (from %08lx) ref count: %d",
		     *map_addr, page_orig->addr, GET_PAGE_REF_CNT(page_orig->flags) + 1);
		
		lx_hash_register_page(lxds, page);
	}
	/* page is referenced by others, copy, original will be an orphan */
	else if(GET_PAGE_REF_CNT(page->flags) > 1)
	{
		DEC_PAGE_REF_CNT(page->flags);
		page_orig = page;
		avl_remove(&(lxds->avl_tree), (avl *)page_orig);

		if((error = __page_copy(
			(void*)lx_page_get_map_addr(page_orig), &new_addr)))
		{	
			LOGd(DBG_SEM,"PAGE up");
			l4semaphore_up(&(lxds->sem));
			return error;
		}
		
		if((error = lx_page_add(lxds, offset, (l4_addr_t)new_addr, 0)))
		{	
			LOGd(DBG_SEM,"PAGE up");
			l4semaphore_up(&(lxds->sem));
			return error;
		}
		
		l4util_inc32(&(task_stats->page_cnt));

		*map_addr = (l4_addr_t)new_addr;
		LOGd(DBG_PAGEHANDLING, "Copy from ALREADY addr %08lx (from %08lx) "
		     "referenced page %d",
		     *map_addr, page_orig->addr, GET_PAGE_REF_CNT(page->flags) + 1);
	}
	/* is rw page, reset RO flag */
	else {
		*map_addr = lx_page_get_map_addr(page);
//		LOGd(DBG_PAGEHANDLING, "Returning used rw page addr: %08lx\n", *map_addr); 
		UNSET_MAPPED_RO(page->flags);
	}
	LOGd(DBG_SEM,"PAGE up");
	l4semaphore_up(&(lxds->sem));

	if(!*map_addr)
		enter_kdebug("Page miss");

	return 0;
}


/*******************************************************************************
 * INITIALIZATION
 ******************************************************************************/


int lx_task_init(void)
{
	int error;
	int i, j;
	for(i = 0; i < LX_MAX_INSTANCES; i++) {
		lx_task_t task = LX_TASK_INITIALIZER;
		lx_tasks[i] = task;
		for(j = 0; j < LX_MAX_DS; j++) {
			lx_ds_t ds = LX_DS_INITIALIZER;
			
			ds.avl_tree.compar = avl_page_cmp;
			ds.avl_tree.root   = 0;
			lx_tasks[i].ds[j] = ds;
		}
	}
	
	lx_task_add_pos(L4_INVALID_ID, TASK_FROZEN);

	error = lx_slab_setup(&page_cache, sizeof(lx_page_t));
	return error;
}

