#include "CudaVersionCheck.h"
#include "LpvStructs.h"
#include "LpvUtils.h"

#include "../Defines.h"
#include "cutil_math.h"
#include <math_constants.h>

__global__ void deviceDownsampleVolumes(LPVComponents source, LPVComponents target, char* ix, int size, int level) {
	int idx = blockIdx.x * blockDim.x + threadIdx.x;
	int idy = blockIdx.y * blockDim.y + threadIdx.y;
	int idz = blockIdx.z * blockDim.z + threadIdx.z;
	
	int3 tz = target.size;
	int3 sz = source.size;
	
	if (idx < tz.x && idy < tz.y && idz < tz.z) {
		
		float4& shr = target.red[idz*tz.y*tz.x + idy*tz.x + idx];
		float4& shg = target.green[idz*tz.y*tz.x + idy*tz.x + idx];
		float4& shb = target.blue[idz*tz.y*tz.x + idy*tz.x + idx];
		
		shr = make_float4(0.0f);
		shg = make_float4(0.0f);
		shb = make_float4(0.0f);
		
		for(int z = 2*idz; z < 2*idz+2; z++) {
			for(int y = 2*idy; y < 2*idy+2; y++) {
				for(int x = 2*idx; x < 2*idx+2; x++) {
					int i3d = z*sz.y*sz.x + y*sz.x + x;
					shr += source.red[i3d];
					shg += source.green[i3d];
					shb += source.blue[i3d];
				}
			}
		}
		shr *= 1.0f/8.0f;
		shg *= 1.0f/8.0f;
		shb *= 1.0f/8.0f;
		float4 zero = make_float4(0.0f);
		if(!isZero(shr) || !isZero(shg) || !isZero(shb)) {
			char& oldValue = ix[idz*tz.y*tz.x + idy*tz.x + idx];
			oldValue = min(oldValue, level + 1);
		}
	}
}


void cudaDownsampleVolumes(void* sources[3], void* targets[3], void* ix, int size, int level) {

	int3 fromSize = make_int3(size >> level);
	int3 toSize = make_int3(size >> (level + 1));

	LPVComponents src;
	src.size = fromSize;
	src.red = (float4*)sources[0];
	src.green = (float4*)sources[1];
	src.blue = (float4*)sources[2];
	
	LPVComponents trg;
	trg.size = toSize;
	trg.red = (float4*)targets[0];
	trg.green = (float4*)targets[1];
	trg.blue = (float4*)targets[2];
	
	dim3 dimBlock(BLOCKSIZE, BLOCKSIZE, BLOCKSIZE);
	dim3 dimGrid(fromSize.x/BLOCKSIZE+1, fromSize.y/BLOCKSIZE+1, fromSize.z/BLOCKSIZE+1);
	
	deviceDownsampleVolumes<<<dimGrid, dimBlock>>>(src, trg, (char*)ix, size, level);
}
