import "LibDICOM.dll"
import "Quasar.Video.dll"
import "system.q"
import "inttypes.q"

type ray : class
    src : vec3
    dir : vec3
end

type ray_segment : mutable class
    near : scalar
    far : scalar
    near_intensity : scalar
end

type box : class
    min : vec3
    max : vec3
end

type line_segment : mutable class
    near : vec3
    far : vec3
    near_dim : int
    is_intersecting : int
end

% Function: box_contains
%
% Checks whether the specified box contains a given point.
%
function y : int = __device__ box_contains(b : box, p : vec3)
    y = b.min[0] <= p[0] && _
        b.min[1] <= p[1] && _
        b.min[2] <= p[2] && _
        b.max[0] >= p[0] && _
        b.max[1] >= p[1] && _
        b.max[2] >= p[2]
        
end

% Function: intersectswith
%
% Checks whether a ray intersects with a given box (and also
% computes the intersection)
%
function [ls:line_segment] = __device__ intersectswith(b : box, r : ray)

    v1 = [r.dir[0]<0.0 ? b.max[0] : b.min[0], _
          r.dir[1]<0.0 ? b.max[1] : b.min[1], _
          r.dir[2]<0.0 ? b.max[2] : b.min[2]]
    w1 = [r.dir[0]<0.0 ? b.min[0] : b.max[0], _
          r.dir[1]<0.0 ? b.min[1] : b.max[1], _
          r.dir[2]<0.0 ? b.min[2] : b.max[2]]               
    ls=line_segment(near:=[0.,0.,0.],far:=[0.,0.,0.],near_dim:=0,is_intersecting:=false)
    
    
    tmin = (v1 - r.src)./r.dir
    tmax = (w1 - r.src)./r.dir
    
    if tmin[0]>tmax[1] || tmin[1]>tmax[0]
        return
    endif
    tmin[0] = max(tmin[0],tmin[1])
    tmax[0] = min(tmax[0],tmax[1])
    if tmin[0] > tmax[2] || tmin[2] > tmax[0]
        return
    endif
    tmin[0] = max(tmin[0],tmin[2])
    tmax[0] = min(tmax[0],tmax[2])
    
    ls.near_dim = tmin[0] == tmin[1] ? 1 : tmin[0] == tmin[2] ? 2 : 0        
    ls.near = r.src + tmin[0] * r.dir            
    ls.far  = r.src + tmax[0] * r.dir + 0
    ls.is_intersecting = true
end

% Function: ray_rotate_yz
%
% Rotation of a ray in the YZ-plane
function [r_out : ray] = __device__ ray_rotate_yz(r : ray, p : vec3, theta : scalar)
    [cs,sn] = [cos(theta),sin(theta)]
    ray_dir = [r.dir[0], _
               r.dir[1] * cs - r.dir[2] * sn, _               
               r.dir[1] * sn + r.dir[2] * cs]
    ray_src = r.src - p
    ray_src = p + [ray_src[0], _
                   ray_src[1] * cs - ray_src[2] * sn, _
                   ray_src[1] * sn + ray_src[2] * cs+0]
    r_out = ray(src:=ray_src, dir:=ray_dir)
end

% Function: ray_scale
%
% 3D scaling of a ray
function [r_out : ray] = __device__ ray_scale(r : ray, scale : vec3)
    ray_dir = r.dir .* scale
    ray_dir = ray_dir / sqrt(sum(ray_dir.^2.0))
    r_out = ray(src:=r.src .* scale, dir:=ray_dir)
end

% Function: raytracer_trilinear
%
% Volumetric ray tracer implementation, using trilinear interpolation
% Note: we use CUDA hardware textures for speeding up things
%
% Parameters:
%   im_out - the output image
%   voxels - a 3D cube with the voxel data
%   voxelsize - the size of one individual voxel
%   camera_pos - The 3D position of the camera.
%   scale - a scaling factor used for rendering
%   theta - the rotation angle in the YZ-plane
function [] = __kernel__ raytracer_trilinear( _
    im_out : mat'unchecked, _
    voxels : cube'hwtex_linear'safe, _
    voxelsize : vec3, _
    camera_pos : vec3, _
    scale : scalar, _
    theta: scalar, _
    pos : ivec2)
    
    % Compute the ray direction and origin
    p = (float(pos) - 0.5*float(size(im_out))) * scale
    ray_dir = [p[0],p[1],0]-camera_pos
    ray_dir = ray_dir / sqrt(sum(ray_dir.^2.0)) % normalization - needed for lighting
    
    % Ray trace the main cube
    ray = ray(src:=camera_pos, dir:=ray_dir)
    offset = int([0.5*size(voxels,0), 0.5*size(voxels,1), 0].*voxelsize)
    ray = ray_rotate_yz(ray, [0.0, 0.0, 0.5*size(voxels,2).*voxelsize[2]], theta)
    ray = ray_scale(ray, 1./voxelsize)
    
    data_cube = box(min:=-float(offset), max:=float(size(voxels)-offset))
    is = intersectswith(data_cube, ray)    
                
    % We are outside the data cube...
    if !is.is_intersecting
        im_out[pos] = 0.0
        return
    endif   

    % The light direction    
    light_dir = [-1.0,1.0,-1.0]/3         

    % Render the surface    
    i = 0
    data_cube = box(min:=data_cube.min-1.0, max:=data_cube.max+1.0)
    dst_val = 0.0
    dst_alpha = 0.0
    step_size = 0.5
    gradient = [1.,1.,1.]
    
    % ray dithering
    steps = int(0.333 * sum(abs((is.far - is.near) ./ ray.dir))/step_size)
       
    for i=0..steps
        q = is.near + step_size * (i)*ray.dir
        if true
            qo = q + offset
            src_alpha = voxels[qo]
            
            % compute the gradient
            new_gradient = [voxels[qo+[1.,0.,0.]],voxels[qo+[0.,1.,0.]],voxels[qo+[0.,0.,1.]]] - src_alpha
            norm = dotprod(new_gradient,new_gradient)
            if norm > 0.01
                gradient += 0.1 * (new_gradient / sqrt(norm) - gradient)
            endif            
            
            % lighting 
            src_val = (1.0 + 1.0 * dotprod(light_dir, float(gradient))) * src_alpha
            
            % compositing    
            %dst_val   += src_val * src_alpha
            %dst_alpha += src_alpha
            dst_val ^^= src_val
            dst_alpha = 1.0
            %if (1 - dst_alpha) < 1e-4 % epsilon
            %    break               
            %endif                 
        endif
    end
    im_out[pos] = dst_val * dst_alpha

end

% Function: raytracer_trilinear_coloroverlay
%
% Volumetric ray tracer implementation, using trilinear interpolation
% and with a color overlay
% Note: we use CUDA hardware textures for speeding up things
%
% Parameters:
%   im_out - the output image
%   voxels - a 3D cube with the voxel data
%   voxels_overlay - a 3D cube with the overlay data
%   voxelsize - the size of one individual voxel
%   camera_pos - The 3D position of the camera.
%   scale - a scaling factor used for rendering
%   theta - the rotation angle in the YZ-plane
%   translation - a translation vector
%   overlay_color - the color used for the overlay
function [] = __kernel__ raytracer_trilinear_coloroverlay( _
    im_out : cube'unchecked, _
    voxels : cube'hwtex_linear'safe, _
    voxels_overlay : cube'hwtex_linear'safe, _
    voxelsize : vec3, _
    camera_pos : vec3, _
    scale : scalar, _
    theta: scalar, _
    translation : vec3, _
    overlay_color : vec3, _
    pos : ivec2)
    
    % Compute the ray direction and origin
    p = (float(pos) - 0.5*float(size(im_out,0..1))) * scale
    ray_dir = [p[0],p[1],0]-camera_pos
    ray_dir = ray_dir / sqrt(sum(ray_dir.^2.0)) % normalization - needed for lighting
    
    % Ray trace the main cube
    ray = ray(src:=camera_pos, dir:=ray_dir)
    offset = int([0.5*size(voxels,0), 0.5*size(voxels,1), 0].*voxelsize)
    ray = ray_rotate_yz(ray, [0.0, 0.0, 0.5*size(voxels,2).*voxelsize[2]], theta)
    ray = ray_scale(ray, 1./voxelsize)
    
    data_cube = box(min:=-float(offset), max:=float(size(voxels)-offset))
    is = intersectswith(data_cube, ray)    
                
    % We are outside the data cube...
    if !is.is_intersecting
        im_out[pos[0],pos[1],0..2] = 0.0
        return
    endif   

    % The light direction    
    light_dir = [-1.0,1.0,-1.0]/3         

    % Render the surface    
    i = 0
    data_cube = box(min:=data_cube.min-1.0, max:=data_cube.max+1.0)
    gray_val = 0.0
    dst_val = [0.0,0.0,0.0]
    dst_alpha = 0.0
    step_size = 0.5
    gradient = [1.,1.,1.]
    
    % ray dithering
    steps = int(0.333 * sum(abs((is.far - is.near) ./ ray.dir))/step_size)
       
    for i=0..steps
        q = is.near + step_size * (i)*ray.dir
        qo = q + offset
        src_alpha = voxels[qo]
        
        % compute the gradient
        new_gradient = [voxels[qo+[1.,0.,0.]],voxels[qo+[0.,1.,0.]],voxels[qo+[0.,0.,1.]]] - src_alpha
        norm = dotprod(new_gradient,new_gradient)
        if norm > 0.01
            gradient += 0.1 * (new_gradient / sqrt(norm) - gradient)
        endif            
        
        % lighting 
        src_val = (1.0 + 1.0 * dotprod(light_dir, float(gradient))) * src_alpha
        
        % compositing  
        gray_val ^^= src_val  
        %dst_val   += src_val * src_alpha
        %dst_alpha += src_alpha
        
        % overlay
        beta = voxels_overlay[qo] * src_alpha
        dst_val   += overlay_color  * beta
        dst_alpha += beta        
    end
    im_out[pos[0],pos[1],0..2] = max(dst_val * dst_alpha, [gray_val,gray_val,gray_val])

end

% Function: voxelfilter
%
% A 3D 2x2x2 box filter, used to speed up the volumetric rendering
%
% Parameters:
%   voxelsf - the filtered 3D volume
%   voxels - the original 3D volume
%
function [] = __kernel__ voxelfilter(voxelsf : cube'unchecked, voxels : cube'hwtex_nearest, pos : ivec3)
    voxelsf[pos] = voxels[pos]+voxels[pos+[0,0,1]]+voxels[pos+[0,0,-1]]+ _
                   voxels[pos+[0,1,0]]+voxels[pos+[0,1,1]]+voxels[pos+[0,-1,0]]+voxels[pos+[0,-1,-1]] + _
                   voxels[pos+[1,0,0]]+voxels[pos+[1,0,1]]+voxels[pos+[-1,0,0]]+voxels[pos+[-1,0,-1]] + _
                   voxels[pos+[1,1,0]]+voxels[pos+[1,1,1]]+voxels[pos+[-1,-1,0]]+voxels[pos+[-1,-1,-1]]
end

% Function: volumeraycasting
%
% Real-time visualization of 3D volumes (the main function)
%
% Parameters:
%
% voxels - the 3D volume to visualize
% voxelsize - the size of an individual voxel (isotropic voxels: use [1,1,1])
% interpolation_method - the interpolation technique to use ("nearest" or "trilinear")
function [] = volumeraycasting(voxels, voxelsize = [1,1,1])

    hold("off")
    im_out = zeros(512,768)
    scale = 1.0
    camera_pos = [150,0,-450] 
    theta = 0.0   
    
    repeat    
        theta = theta + 0.01  
        parallel_do(size(im_out,0..1),im_out,voxels,voxelsize, camera_pos,scale,theta,raytracer_trilinear)
        
        fig = imshow(im_out,[0,1])
        
        if fig != null
            fig.enable_3dcontrol()
            theta = fig.theta
            scale = fig.scale
        endif
           
    until !hold("on")
    
end

function [] = volumeraycasting_sidebyside(voxelsA, voxelsB, output_filename, voxelsize = [1,1,1])

    hold("off")
    im_outA = zeros(512,512)
    im_outB = zeros(512,512)
    im_out = zeros(512,1024)
    scale = 1.0
    camera_pos = [150,0,-450] 
    theta = 0.0   
    theta0 = 0.0     
    frame_index = 0
    
    codec_settings = object()
    codec_settings.encoder = "mpeg4" % guess from filename
    codec_settings.video_bit_rate = 20000000 % bits per second
    codec_settings.frame_width = size(im_out,1)
    codec_settings.frame_height = size(im_out,0)
    codec_settings.avg_frame_rate = 20 % same as original
    codec_settings.gop_size = 10 % periodicity of the I-frames
    codec_settings.bits_per_color = 8
    out_stream = vidsave(strcat(output_filename, ".mp4"), codec_settings)
    
    repeat    
        theta0 = theta0 + 0.01  
        frame_index += 1
        parallel_do(size(im_outA,0..1),im_outA,voxelsA,voxelsize, camera_pos,scale,theta,raytracer_trilinear)
        parallel_do(size(im_outB,0..1),im_outB,voxelsB,voxelsize, camera_pos,scale,theta,raytracer_trilinear)
        im_out[:,0..size(im_outA,1)-1] = clamp(im_outA, 1)
        im_out[:,size(im_outA,1)..2*size(im_outA,1)-1] = clamp(im_outB, 1)
        fig = imshow(im_out,[0,1])
        vidwriteframe(out_stream, frame_index / codec_settings.avg_frame_rate, float_to_uint8(repmat(im_out*255,[1,1,3])))
        
        if fig != null
            fig.enable_3dcontrol()
            theta = fig.theta + theta0
            scale = fig.scale
        endif
           
    until !hold("on") || frame_index > 500
    
end

function [] = volumeraycasting_overlay(voxels : cube, voxels_overlay : cube, voxelsize = [1,1,1], overlay_color = [0.0,1.0,0.0])

    % calculate the center
    sz = size(voxels,0..2)
    ctr = zeros(numel(sz))
    ctrs = 0.0
    for m=0..sz[0]-1
        for n=0..sz[1]-1
            for k=0..sz[2]-1
                val = voxels[m,n,k]
                ctr[0] += val .* m
                ctr[1] += val .* n
                ctr[2] += val .* k
                ctrs += val
            end
        end
    end
    
    translation = ctr/ctrs - sz/2

    hold("off")
    im_out = zeros(512*2,768*2,3)
    scale = 0.125
    camera_pos = [200,0,-200] 
    theta = 0.0   
    
    repeat    
        theta = theta + 0.01  
        parallel_do(size(im_out,0..1),im_out,voxels,voxels_overlay,voxelsize, camera_pos,scale,theta,translation,overlay_color,raytracer_trilinear_coloroverlay)
        
        imshow(im_out,[0,1])
        %fig = imshow(im_out,[0,1])
        
        %if fig != null
        %    fig.enable_3dcontrol()
        %    %theta = fig.theta
        %    scale = 0.5 * fig.scale
        %endif
           
    until !hold("on")
    
end

function im_out = render_volumeraycasting_overlay(voxels : cube, voxels_overlay : cube, voxelsize = [1,1,1], overlay_color = [0.0,1.0,0.0], theta = 0.0)

    % calculate the center
    sz = size(voxels,0..2)
    ctr = zeros(numel(sz))
    ctrs = 0.0
    for m=0..sz[0]-1
        for n=0..sz[1]-1
            for k=0..sz[2]-1
                val = voxels[m,n,k]
                ctr[0] += val .* m
                ctr[1] += val .* n
                ctr[2] += val .* k
                ctrs += val
            end
        end
    end
    
    translation = ctr/ctrs - sz/2

    hold("off")
    im_out = zeros(512*2,768*2,3)
    scale = 0.125
    camera_pos = [200,0,-200] 
    
    parallel_do(size(im_out,0..1),im_out,voxels,voxels_overlay,voxelsize, camera_pos,scale,theta,translation,overlay_color,raytracer_trilinear_coloroverlay)
            
end