#include "CudaVersionCheck.h"
#include "LpvStructs.h"
#include "LpvUtils.h"

#include "../Defines.h"
#include "cutil_math.h"
#include <math_constants.h>

__global__ void deviceDownsampleGeometry(float4* source, float4* target, char* ix, int size, int level) {
	int idx = blockIdx.x * blockDim.x + threadIdx.x;
	int idy = blockIdx.y * blockDim.y + threadIdx.y;
	int idz = blockIdx.z * blockDim.z + threadIdx.z;
	
	int3 sz = make_int3(size >> level);
	int3 tz = make_int3(size >> (level + 1));
	
	if (idx < tz.x && idy < tz.y && idz < tz.z) {
		
		float4& sh = target[idz*tz.y*tz.x + idy*tz.x + idx];
		
		sh = make_float4(0.0f);
		
		for(int z = 2*idz; z < 2*idz+2; z++) {
			for(int y = 2*idy; y < 2*idy+2; y++) {
				for(int x = 2*idx; x < 2*idx+2; x++) {
					int i3d = z*sz.y*sz.x + y*sz.x + x;
					sh += source[i3d];
				}
			}
		}
		sh *= 1.0f/8.0f;
		if(!isZero(sh)) {
			char& oldValue = ix[idz*tz.y*tz.x + idy*tz.x + idx];
			oldValue = min(oldValue, level + 1);
		}
	}
}


void cudaDownsampleGeometry(void* source, void* target, void* ix, int size, int level) {

	int3 fromSize = make_int3(size >> level);
	
	dim3 dimBlock(BLOCKSIZE, BLOCKSIZE, BLOCKSIZE);
	dim3 dimGrid(fromSize.x/BLOCKSIZE+1, fromSize.y/BLOCKSIZE+1, fromSize.z/BLOCKSIZE+1);
	
	deviceDownsampleGeometry<<<dimGrid, dimBlock>>>((float4*)source, (float4*)target, (char*)ix, size, level);
}
