#include "CudaVersionCheck.h"
#include "LpvStructs.h"
#include "LpvUtils.h"

#include "../Defines.h"
#include "cutil_math.h"
#include <math_constants.h>

__global__ void deviceMergeVolumes(LPVComponents source, LPVComponents target, char* ix, int size, int level) {
	int idx = blockIdx.x * blockDim.x + threadIdx.x;
	int idy = blockIdx.y * blockDim.y + threadIdx.y;
	int idz = blockIdx.z * blockDim.z + threadIdx.z;
	
	int tl = level;
	int sl = level + 1;
	
	int3 tz = target.size;
	int3 sz = source.size;
	
	int3 tp = make_int3(idx, idy, idz);
	int3 sp = make_int3(idx / 2, idy / 2, idz / 2);
	
	int ti = makeVolumeIndex(tz, tp);
	int si = makeVolumeIndex(sz, sp);
	
	if(isInside(tz, tp)) {
		if(ix[ti] != level) {
			target.red[ti] = source.red[si];
			target.green[ti] = source.green[si];
			target.blue[ti] = source.blue[si];
		}
	}
	
}

void cudaMergeVolumes(void* source[3], void* target[3], void* ix, int size, int level) {

	int3 sourceSize = make_int3(size >> (level + 1));
	int3 targetSize = make_int3(size >> level);

	LPVComponents src;
	src.size = sourceSize;
	src.red = (float4*)source[0];
	src.green = (float4*)source[1];
	src.blue = (float4*)source[2];
	
	LPVComponents trg;
	trg.size = targetSize;
	trg.red = (float4*)target[0];
	trg.green = (float4*)target[1];
	trg.blue = (float4*)target[2];
	
	dim3 dimBlock(BLOCKSIZE, BLOCKSIZE, BLOCKSIZE);
	dim3 dimGrid(targetSize.x/BLOCKSIZE+1, targetSize.y/BLOCKSIZE+1, targetSize.z/BLOCKSIZE+1);
	
	deviceMergeVolumes<<<dimGrid, dimBlock>>>(src, trg, (char*)ix, size, level);
}
