#ifndef LNSST__LNLIBC_CILK_CILK_H
#define LNSST__LNLIBC_CILK_CILK_H 1


#include "../../../lnstdef.h"



struct LNSST_STRUCT_NAME(lnsst_cilk_args) {
	LNSST_STRUCT_MEMBER_TYPE(1, int)
		LNSST_STRUCT_MEMBER_NAME(1, arg)
	;   
		/*
			one "standard" integer argument to pass to functions

			values are specific to the function being called, this just
			provides an easy mechanism
		*/

	LNSST_STRUCT_MEMBER_TYPE(2, int)
		LNSST_STRUCT_MEMBER_NAME(2, complete_arg)
	;   
		/*
			used to track that the called function finished

			the caller zeroes this before scheduling the function call, and the
			called function sets it to non-zero just before returning
		*/

	LNSST_STRUCT_MEMBER_TYPE(3, void *)
		LNSST_STRUCT_MEMBER_NAME(3, extra_args)
	;   
		/*
			function-specific extra arguments

			set this to LNSST_NULL before scheduling the function call, if
			unused, or, just do not access from the called function

			the purpose of having this is that it can e.g. point to a structure
			of whichever type and containing however many arguments are needed
		*/

	LNSST_STRUCT_MEMBER_TYPE(4, int)
		LNSST_STRUCT_MEMBER_NAME(4, ret_arg)
	;   
		/*
			one "standard" integer value for a function to return

			values are specific to the function being called, this just
			provides an easy mechanism
		*/

	LNSST_STRUCT_MEMBER_TYPE(5, void)
		LNSST_STRUCT_MEMBER_NAME(5, (*fn_to_call)(void *) )
	;
		/*
			XXX: the weird "member name" is due to being a function pointer,
			and our macros being very simple -- this just gets expanded to:

				void (*fn_to_call)(void *);


			the void * argument is actually:

				struct lnsst_cilk_args *

			but do not want to worry about dealing with recursion here,
			although believe a forward declaration of this struct would be
			sufficient
		*/
};



#ifdef LNSST_USE_SYSTEM_CILK
#undef LNSST_USE_LNSST_PROVIDED_CILK


#ifndef LNSST_NO_CILK_CILK_H
#include <cilk/cilk.h>
#endif /* #ifndef LNSST_NO_CILK_CILK_H */


/*
	NOTE: we do not support cilk_for or an equivalent
*/

#undef LNSST_CILK_SPAWN
#define LNSST_CILK_SPAWN(fn, lnsst_cilk_args_var) \
	cilk_spawn fn(&lnsst_cilk_args_var)

#undef LNSST_CILK_SYNC
#define LNSST_CILK_SYNC(lnsst_cilk_args_var) \
	cilk_sync


#else


/*
	default to lnsst-provided cilk if system cilk was not requested
*/
#undef LNSST_USE_LNSST_PROVIDED_CILK
#define LNSST_USE_LNSST_PROVIDED_CILK


#endif /* #ifdef LNSST_USE_SYSTEM_CILK */



#ifdef LNSST_USE_LNSST_PROVIDED_CILK


#include "cilkgfp.h"
#include "../defines/null.h"
#include "../defines/nullfp.h"
#include "../../../libs_s/schdfn/schdfnw.h"
#include "../../../libs_s/schdfn/schdfrn.h"


#undef LNSST_CILK_SPAWN
#define LNSST_CILK_SPAWN(fn, lnsst_cilk_args_var) \
	lnsst_cilk_args_var.fn_to_call = fn; \
	schdfnw( \
		cilkgfp, \
		1, \
		sizeof(lnsst_cilk_args_var), \
		&lnsst_cilk_args_var, \
		0, \
		LNSST_NULL, \
		LNSST_NULL, \
		LNSST_NULLFP \
	)

#undef LNSST_CILK_SYNC
#define LNSST_CILK_SYNC(lnsst_cilk_args_var) \
	while (! (lnsst_cilk_args_var.complete_arg) ) { \
		schdfrn(LNSST_NULL); \
	}


#endif /* #ifdef LNSST_USE_LNSST_PROVIDED_CILK */



/*
================================================================================
#	example that should work using either system-provided cilk (via clang or
#	gcc or icc targetting x86-32 or x86-64 at least), or by using our scheduled
#	function library -- the macros get translated to the appropriate "backend"
================================================================================


	void parallelize_me(void * vp) {

		struct lnsst_cilk_args * cilk_fn_args_p;


		cilk_fn_args_p = vp;


		# do stuff here
		...


		if (cilk_fn_args_p) {
			(*cilk_fn_args_p).ret_arg = 3;
			(*cilk_fn_args_p).complete_arg = 1;
		}
		return;
	}


================================================================================
#	then, to call this from another function:
================================================================================

	struct lnsst_cilk_args cilk_fn_args_1;
	struct lnsst_cilk_args cilk_fn_args_2;
	struct lnsst_cilk_args cilk_fn_args_3;


	cilk_fn_args_1.arg = 1;
	cilk_fn_args_2.arg = 2;
	cilk_fn_args_3.arg = 3;

	cilk_fn_args_1.complete_arg = 0;
	cilk_fn_args_2.complete_arg = 0;
	cilk_fn_args_3.complete_arg = 0;

	cilk_fn_args_1.extra_args = LNSST_NULL;
	cilk_fn_args_2.extra_args = LNSST_NULL;
	cilk_fn_args_3.extra_args = LNSST_NULL;



	LNSST_CILK_SPAWN(parallelize_me, cilk_fn_args_1);
	LNSST_CILK_SPAWN(parallelize_me, cilk_fn_args_2);
	LNSST_CILK_SPAWN(parallelize_me, cilk_fn_args_3);


	LNSST_CILK_SYNC(cilk_fn_args_1);
	LNSST_CILK_SYNC(cilk_fn_args_2);
	LNSST_CILK_SYNC(cilk_fn_args_3);


================================================================================
#	as the sync calls finish, we can then examine the "return values":
#		cilk_fn_args_1.ret_arg
#		cilk_fn_args_2.ret_arg
#		cilk_fn_args_3.ret_arg
================================================================================
*/


/*
	note: our code should always use

		LNSST_CILK_SPAWN
		LNSST_CILK_SYNC


	we do not provide

		cilk_for
		cilk_spawn
		cilk_sync

	since we wrap them for slightly different syntax -- we do not provide the
	normal syntax, and lnsst code should never use the normal syntax


	our wrapped versions may translate into the normal syntax, or they may
	translate into an lnsst-specific implementation, but we do not use the
	normal syntax directly, in order to allow for this flexibility


	note that we do not provide any

		LNSST_CILK_FOR
*/



#endif /* #ifndef LNSST__LNLIBC_CILK_CILK_H */
