Quasar > Medical image Processing > CT reconstruction > forward_fanbeam_projection
%==============================================================================
% fanbeam.q
% Some experimental routines for 2D fan-beam CT processing and 
% reconstruction
% 2012, Bart Goossens, Ghent Univ./TELIN-IPI-iMinds
%==============================================================================

{!author name="Bart Goossens"}
{!doc category="Medical image Processing/CT reconstruction"}

% Computation of the peak-signal-to-noise ratio
import "system.q"
import "parallelbeam.q"

psnr = (x, y) -> 10*log10(255^2/mean((x-y).^2))

% Function: forward_fanbeam_projection
%
% ray-based forward projection using nearest neighbor/linear interpolation
% (image domain -> sinogram domain). A naive implementation.
%
% :function y = forward_fanbeam_projection(x, fangeom)
%
% Notes:
% this approach is known to produce some high frequency artifacts
% although it may be still useful as a simple testing tool.
% This ray-based algorithm is very simple - but is very fast.
%
% Parameters:
% x - input image 
% fangeom - fanbeam geometry
%
% Example:
% :fangeom = object()
% :fangeom.distsrcdet = 291.20 % distance source-detector
% :fangeom.distsrcobj = 115.84 % distance source-object
% :fangeom.voxelpitch = 0.085
% :fangeom.detoffset = -1.5
% :fangeom.dimx = 560 % number of detector elements
% :fangeom.pixelpitch = 0.20 % distance between the detector elements
% :fangeom.angles = linspace(0,2*pi,512)
function y = forward_fanbeam_projection(x, fangeom)

    % y = sinogram space, x = image space
    function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, _
        angles : vec, angles2 : vec, pos : ivec2, _
        voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar)

        theta = angles[pos[1]] % angle of the source
        d = -angles2[pos[0]] % angle of the beam
        inv_voxelpitch = 1.0/voxelpitch
        alpha = d+theta % the angle of the ray
        v = -[cos(alpha), sin(alpha)]
        srcpos = float(size(x)) * 0.5 + inv_voxelpitch * distsrcobj * [cos(theta), sin(theta)] % position of the source

        % compute the intersection of the ray with the field of view       
        lambda1 = ([size(x,0)-2.0, 1.0]-srcpos[0])/v[0]
        lambda2 = ([size(x,1)-2.0, 1.0]-srcpos[1])/v[1]
        lambda = [max(min(lambda1),min(lambda2)),min(max(lambda1),max(lambda2))]

        sum = 0.0

        for i=lambda[0]..lambda[1]
            p = srcpos + i * v   
            sum += x[p]   % nearest neighbor interpolation

            % linear interpolation
            %f=floor(p)
            %a=frac(p)
            %sum += x[f]*(1.0-a[0])*(1.0-a[1]) + _
            %       x[f+[0,1]]*(1.0-a[0])*a[1] + _
            %       x[f+[1,0]]*a[0]*(1.0-a[1]) + _
            %       x[f+[1,1]]*a[0]*a[1]
        end
        
        y[pos] = sum
    end

    angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - _
        0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
    y = zeros(fangeom.dimx,numel(fangeom.angles))

    parallel_do(size(y),x,y,fangeom.angles,angles2,fangeom.voxelpitch, _
        fangeom.distsrcdet, fangeom.distsrcobj, process)
end

% Function: adjoint_fanbeam_projection
%
% adjoint ray-based fanbeam projection using nearest neighbor/linear interpolation. 
% (sinogram domain -> image domain).
%
% :function y = adjoint_fanbeam_projection(x, fangeom, sz)
%
% Notes:
% The adjoint is not the same as the backward projection, however a reconstruction can be
% obtained using e.g. the conjugate gradient algorithm.
%
% Parameters:
% x - sinogram data
% fangeom - fanbeam geometry (see <forward_fanbeam_projection>)
% sz - size of the output image
function y = adjoint_fanbeam_projection(x, fangeom, sz)

    % x = sinogram space, y = image space
    function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, _
        angles : vec, angles2 : vec, pos : ivec2, _
        voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar)

        theta = angles[pos[1]] % angle of the source
        d = -angles2[pos[0]] % angle of the beam
        inv_voxelpitch = 1.0/voxelpitch        
        alpha = d+theta % the angle of the ray
        v = -[cos(alpha), sin(alpha)]
        srcpos = float(size(y)) * 0.5 + inv_voxelpitch * distsrcobj * _
            [cos(theta), sin(theta)] % position of the source

        % compute the intersection of the ray with the field of view
        lambda1 = ([size(y,0)-2.0, 1.0]-srcpos[0])/v[0]
        lambda2 = ([size(y,1)-2.0, 1.0]-srcpos[1])/v[1]
        lambda = [max(min(lambda1),min(lambda2)),min(max(lambda1),max(lambda2))]

        sum = x[pos]

        for i=lambda[0]..lambda[1]
            p = int(srcpos + i * v)
            y[p] += sum  % nearest neighbor interpolation
            % linear interpolation
            %f=floor(p)
            %a=frac(p)
            %y[f] += (1.0-a[0])*(1.0-a[1])*sum
            %y[f+[0,1]] += (1.0-a[0])*a[1]*sum
            %y[f+[1,0]] += a[0]*(1.0-a[1])*sum
            %y[f+[1,1]] += a[0]*a[1]*sum
        end
    end

    % Precompute the angles of the source to the detector array elements
    angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - 0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
    y = zeros(sz)

    parallel_do(size(x),x,y,fangeom.angles,angles2,fangeom.voxelpitch, fangeom.distsrcdet, fangeom.distsrcobj, process)
end

% Function: forward_dd_fanbeam_projection
%
% Ray-based forward Distance-Driven projection: a reimplementation
% of the Distance-Driven algorithm of De Man, although with a simplification:
% (image domain -> sinogram domain)
% One first important detail is the projection normalization (because the sum of 
% the weights of the rays that go through a point is not necessarily 1).
% Here, we perform the normalization directly in the image domain, prior to
% the forward projection. We can call this a PRE-normalization.
% Correspondingly, in the adjoint projector <adjoint_dd_fanbeam_projection>, 
% the normalization is performed in image domain, AFTER the projection.
%
% A second detail is that the projection axis used to calculate the weights,
% is the detector array itself (some implementations use the horizontal or
% vertical axes), assuming circular pixels.
%
% There is another subtle detail, in some other distance driven implementation,
% the normalization weights are accumulated per projection angle, resulting
% in a cube of weights of size(im,0)*size(im,1)*numel(fangeom.angles) - which
% practically corresponds to a large amount of memory that needs to be pre-
% allocated. Here, we simply use the sum of these weights (sum(weights,2)).
% It is not completely clear to me how this choice affects the image quality.
% However, it is sure that the approach here is numerically more stable because
% of the larger weights (obtained after summing).
%
% The normalization is the source of possible numerical accuracy errors, 
% especially when the normalization values are very small, and in case of
% ill-posed reconstruction (e.g. when either the number of detector samples is
% too small, or the number of projection angles). This effect is even 
% strengthened by the fact that order of the parallel computations is not 
% guaranteed in the adjoint projection. One possible improvement would be 
% to incorporate Kahan's summing algorithm. It may also be useful to 
% compare single precision accuracy vs. double precision accurary (the command
% line option -double in Quasar).
%
% :function y = forward_dd_fanbeam_projection(x, ni, fangeom)
%
% Parameters:
% x - input image (image domain)
% ni - normalization image: a matrix with pre-computed normalization values.
% fangeom - fanbeam geometry
function y = forward_dd_fanbeam_projection(x, ni, fangeom)

    % x = image space, y = sinogram space, ni = normalization image
    function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, angles : vec'unchecked, _
        voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar, pixelpitch : scalar, detoffset : scalar, _
        pos : ivec2, blkpos : ivec2, blkdim : ivec2)

        theta = angles[pos[0]] % the projection angle
        ray_dir = [cos(theta), sin(theta)] % direction of the ray
        srcpos = float(size(x)) * 0.5 + distsrcobj * ray_dir / voxelpitch % position of the source
        srcpos_orth = [-srcpos[1], srcpos[0]]
        detarraydir = [-sin(theta), cos(theta)] * pixelpitch / voxelpitch % the direction of the detector array
        % position of the detector array, corrected with the detector offset
        detarraypos = float(size(x)) * 0.5 + (distsrcobj - distsrcdet) * ray_dir / voxelpitch + _
            (detoffset - 0.5*size(y,0)) * detarraydir
        halfpixelbound = [-sin(theta), cos(theta)]

        % compute the thread index (assuming that there are 32 warps, and that each row is
        % processed sequentially)
        nthreads = 4
        thread_idx = mod(blkpos[1],nthreads)
        dims = size(y,0)
        assert(dims<=1024 && nthreads<=4)
        % allocate shared memory for this thread
        y_val = shared(dims,nthreads)
        % initialize
        for i=0..size(y,0)-1
            y_val[i, thread_idx] = 0
        end
        syncthreads

        for m=0..size(x,0)-1
            p = [m, pos[1]]   % the position of the pixel we are considering 

            % project from srcpos through p onto the detector array
            p1 = float(p) + 0.5
            p2 = p1 - halfpixelbound % left boundary
            p3 = p1 + halfpixelbound % right boundary
            smp2 = srcpos_orth - [-p2[1], p2[0]] % orthogonal of (s-p)
            smp3 = srcpos_orth - [-p3[1], p3[0]]
            
            % compute the detector positions
            lambda2 = dotprod(smp2, p2 - detarraypos) / dotprod(smp2, detarraydir)
            lambda3 = dotprod(smp3, p3 - detarraypos) / dotprod(smp3, detarraydir)

            det_x = min(int(lambda2),int(lambda3))
            [min1, max1] = [min(lambda2,lambda3), max(lambda2,lambda3)]
            while det_x >= 0 && det_x < size(y,0) && det_x < lambda3
                [min2, max2] = [float(det_x), float(det_x+1)]

                % compute the overlap with the detector element - we exploit the fact
                % that we know that there IS an overlap, which gives a simple formula
                overlap = min(max1, max2) - max(min1, min2)

                % note that the normalization has already been done here...
                y_val[det_x, thread_idx] += overlap * x[p]
                det_x += 1
            end

        end
        syncthreads
        % now we perform a reduction to compute the final values
        if blkdim[1] > 1
            bit = 1
            while bit < nthreads
                if mod(thread_idx,bit*2) == 0 && blkpos[1] < nthreads
                    for i=0..size(y,0)-1
                        y_val[i, thread_idx] = y_val[i, thread_idx] + y_val[i,thread_idx + bit]
                    end
                endif
                syncthreads
                bit *= 2
            end

            % write the results to the output - we again use some parallellization for this
            nblocks = int((size(y,0) + blkdim[1] - 1) / blkdim[1])
            for block=0..nblocks-1
                i = block * blkdim[1] + blkpos[1]
                if i < size(y,0)
                    y[i, pos[0]] = y_val[i, 0]
                endif
            end
        else
            for i=0..size(y,0)-1
                y[i, pos[0]] = y_val[i, 0]
            end
        endif        
    end

    y = zeros(fangeom.dimx,numel(fangeom.angles))    
    blksize = max_block_size(process,[1,size(x,1),1]) % note: block size is important to avoid bank conflicts
    

    parallel_do([[numel(fangeom.angles),size(x,1),1],blksize],x./ni,y,fangeom.angles,fangeom.voxelpitch, fangeom.distsrcdet, _
        fangeom.distsrcobj, fangeom.pixelpitch, fangeom.detoffset, process)

end

% Function: adjoint_dd_fanbeam_projection
%
% Adjoint Ray-based forward Distance-Driven projection
% (sinogram domain -> image domain)
% 
% :function [y, ni] = adjoint_dd_fanbeam_projection(x, fangeom, sz)
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
%
% See also:
%   <forward_dd_fanbeam_projection>
function [y, ni] = adjoint_dd_fanbeam_projection(x, fangeom, sz)

    % x = sinogram space, y = image space, ni = normalization image
    function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, ni : mat'unchecked, angles : vec'unchecked, _
        voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar, pixelpitch : scalar, detoffset : scalar, _
        pos : ivec2, blkpos : ivec2, blkdim : ivec2)

        theta = angles[pos[0]] % the projection angle
        ray_dir = [cos(theta), sin(theta)] % direction of the ray
        srcpos = float(size(y)) * 0.5 + distsrcobj * ray_dir / voxelpitch % position of the source
        srcpos_orth = [-srcpos[1], srcpos[0]]
        detarraydir = [-sin(theta), cos(theta)] * pixelpitch / voxelpitch % the direction of the detector array
        % position of the detector array, corrected with the detector offset
        detarraypos = float(size(y)) * 0.5 + (distsrcobj - distsrcdet) * ray_dir / voxelpitch + _
            (detoffset - 0.5*size(x,0)) * detarraydir
        halfpixelbound = [-sin(theta), cos(theta)]

        for m=0..size(y,0)-1
            p = [m, pos[1]]   % the position of the pixel we are considering

            % project from srcpos through p onto the detector array
            p1 = float(p) + 0.5
            p2 = p1 - halfpixelbound % left boundary
            p3 = p1 + halfpixelbound % right boundary
            smp2 = srcpos_orth - [-p2[1], p2[0]] % orthogonal of (s-p)
            smp3 = srcpos_orth - [-p3[1], p3[0]]

            % compute the detector positions
            lambda2 = dotprod(smp2, p2 - detarraypos) / dotprod(smp2, detarraydir)
            lambda3 = dotprod(smp3, p3 - detarraypos) / dotprod(smp3, detarraydir)

            det_x = min(int(lambda2),int(lambda3))

            [min1, max1] = [min(lambda2,lambda3), max(lambda2,lambda3)]
            while det_x >= 0 && det_x < size(x,0) && det_x < lambda3
                [min2, max2] = [float(det_x), float(det_x+1)]

                % compute the overlap with the detector element - we exploit the fact
                % that we know that there IS an overlap, which gives a simple formula
                overlap = min(max1, max2) - max(min1, min2)

                % note that the normalization has already been done here...
                y[p] += overlap * x[det_x, pos[0]]
                ni[p] += overlap
                det_x += 1
            end
        end
    end

    y = zeros(sz)
    ni = zeros(sz)
    % choose the block size so that we are free of bank conflicts
    blksize = max_block_size(process,[1,sz[1],1])
    parallel_do([[numel(fangeom.angles),sz[1],1],blksize],x,y,ni,fangeom.angles,fangeom.voxelpitch, fangeom.distsrcdet, _
        fangeom.distsrcobj, fangeom.pixelpitch, fangeom.detoffset, process)
    ni = ni .^ 4
    % apply the normalization
    y = y ./ ni
end

% Function: cg_fanbeam_reconstruction
%
% Reconstruction using conjugate gradients - naive forward and adjoint projections
% <forward_fanbeam_projection> and <adjoint_fanbeam_projection>
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
% 
function x = cg_fanbeam_reconstruction(y, fangeom, sz)

    W   = v -> forward_fanbeam_projection(v, fangeom)
    W_H = v -> adjoint_fanbeam_projection(v, fangeom, sz)
    dotprod = (v, w) -> sum(sum(v .* w))

    x = 0
    r = W_H(y) % residual
    p = r
    new_err = dotprod(r, r)
    tol = 1e-8

    for it=1..20 % conjugate gradients    
        err=new_err
        print "iteration ",it," err=",err
        A_p = W_H(W(p))
        alpha = err / dotprod(p, A_p)
        x = x + alpha * p
        r = r - alpha * A_p
        new_err = dotprod(r, r)
        if (new_err/numel(x) < tol)
            break
        endif
        beta = new_err / err
        p = r + beta * p
    end
end

% Function: cg_dd_fanbeam_reconstruction
%
% Reconstruction using conjugate gradients - distance driven projections
% <forward_fanbeam_projection> and <adjoint_fanbeam_projection>
%
% :function x = cg_dd_fanbeam_reconstruction(y, fangeom, sz)
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
% 
function x = cg_dd_fanbeam_reconstruction(y, fangeom, sz)

    dotprod = (v, w) -> sum(sum(v .* w))

    x = 0
    [r,ni] = adjoint_dd_fanbeam_projection(y, fangeom, sz) % residual
    p = r
    new_err = dotprod(r, r)
    tol = 1e-10

    for it=1..20 % conjugate gradients    
        err=new_err
        print "iteration ",it," err=",err
        A_p = forward_dd_fanbeam_projection(p, ni, fangeom)
        [A_p,ni] = adjoint_dd_fanbeam_projection(A_p, fangeom, sz)
        alpha = err / dotprod(p, A_p)
        x = x + alpha * p
        r = r - alpha * A_p
        new_err = dotprod(r, r)
        if (new_err/numel(x) < tol)
            break
        endif
        beta = new_err / err
        p = r + beta * p
    end
end

% Function: fan2para
% 
% Regridding from fan-beam to parallel-beam. This is useful for comparing to 
% the FBP reconstruction.
%
% :function y = fan2para(x, fangeom, parageom)
%
% Parameters:
% x - input fanbeam sinogram
% y - output parallelbeam sinogram
% fangeom - fanbeam geometry
% parageom - parallelbeam geometry
%
function y = fan2para(x, fangeom, parageom)

    interp_nearest : [__device__ (mat'circular, vec2) -> scalar]
    interp_nearest = __device__ (x : mat'circular, pos : vec2) -> x[pos]

    interp_linear : [__device__ (mat'circular, vec2) -> scalar]
    interp_linear = __device__ (x  : mat'circular, pos : vec2) -> _
                (f = frac(pos); q = int(pos); _
                 (1-f[0])*(1-f[1])*x[q] + _
                 f[0]*(1-f[1])*x[q+[1,0]] + _
                 (1-f[0])*f[1]*x[q+[0,1]] + _
                 f[0]*f[1]*x[q+[1,1]])

    function [] = __kernel__ fan2para_process(x : mat'circular, y : mat'unchecked, _
        para_angles : vec, para_offsets : vec, _
        fan_angles : vec, fan_angles2 : vec, distsrcobj : scalar, _
        interp : [__device__ (mat, vec2) -> scalar], pos : vec2)

        d = para_offsets[pos[0]] % offset along the detector array
        theta = para_angles[pos[1]] % angle

        alpha_prime = asin(d / distsrcobj)
        theta_prime = mod(2.0*pi + theta + alpha_prime, 2.0*pi)

        % compute indices
        n = numel(fan_angles2)
        alpha_idx = (alpha_prime-fan_angles2[0])*float(n-1)/(fan_angles2[n-1]-fan_angles2[0])
        % approximation - note: since the angles are not necessarily uniformly spaced
        % we may need to do a binary search here...
        n = numel(fan_angles)
        theta_idx = (theta_prime-fan_angles[0])*float(n-1)/(fan_angles[n-1]-fan_angles[0])

        % linear interpolation
        y[pos] = interp(x, [alpha_idx, theta_idx])
    end

    angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - 0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
    y = zeros(numel(parageom.offsets),numel(parageom.angles))
    parallel_do(size(y), x, y, parageom.angles, parageom.offsets, fangeom.angles, angles2, _
        fangeom.distsrcobj/fangeom.voxelpitch, interp_linear, fan2para_process)

end

function [] = main()

    % fan-beam scanner geometry
    fangeom = object()
    fangeom.distsrcdet = 291.20 % distance source-detector
    fangeom.distsrcobj = 115.84 % distance source-object
    fangeom.voxelpitch = 0.085
    fangeom.detoffset = -1.5
    fangeom.dimx = 560 % number of detector elements
    fangeom.pixelpitch = 0.20 % distance between the detector elements
    fangeom.angles = linspace(0,2*pi,512)

    normalize = x -> (x / max(max(x))) * 255
    im = imread("SheppLogan.png")
    im = im[:,:,0]

    psd = x -> log(1e-4+fftshift2(abs(fft2(x))))
   
    % Compute the sinogram data
    y = forward_dd_fanbeam_projection(im, ones(size(im)), fangeom)

    print "Naive algorithm"
    tic()
    z1 = cg_fanbeam_reconstruction(y, fangeom, size(im))
    toc()
    
    print "Distance driven algorithm"
    tic()
    z2 = cg_dd_fanbeam_reconstruction(y, fangeom, size(im))
    toc()

    print "FBP algorithm"
    parageom = object()
    parageom.angles = linspace(0,2*pi,256)
    parageom.offsets = -255..256
    y_parallel = fan2para(y, fangeom, parageom)
    z3 = fbp_parabeam_reconstruction(y_parallel, parageom, size(im), "hann")

    imshow(y, []); title("Input sinogram")
    imshow(z1, []); title("Reconstructed using the naive algorithm")
    imshow(z2, []); title("Reconstructed using the distance driven algorithm")
    imshow(z3, []); title("Reconstructed using the FBP algorithm")
end