%==============================================================================
% fanbeam.q
% Some experimental routines for 2D fan-beam CT processing and
% reconstruction
% 2012, Bart Goossens, Ghent Univ./TELIN-IPI-iMinds
%==============================================================================
{!author name="Bart Goossens"}
{!doc category="Medical image Processing/CT reconstruction"}
% Computation of the peak-signal-to-noise ratio
import "system.q"
import "parallelbeam.q"
psnr = (x, y) -> 10*log10(255^2/mean((x-y).^2))
% Function: forward_fanbeam_projection
%
% ray-based forward projection using nearest neighbor/linear interpolation
% (image domain -> sinogram domain). A naive implementation.
%
% :function y = forward_fanbeam_projection(x, fangeom)
%
% Notes:
% this approach is known to produce some high frequency artifacts
% although it may be still useful as a simple testing tool.
% This ray-based algorithm is very simple - but is very fast.
%
% Parameters:
% x - input image
% fangeom - fanbeam geometry
%
% Example:
% :fangeom = object()
% :fangeom.distsrcdet = 291.20 % distance source-detector
% :fangeom.distsrcobj = 115.84 % distance source-object
% :fangeom.voxelpitch = 0.085
% :fangeom.detoffset = -1.5
% :fangeom.dimx = 560 % number of detector elements
% :fangeom.pixelpitch = 0.20 % distance between the detector elements
% :fangeom.angles = linspace(0,2*pi,512)
function y = forward_fanbeam_projection(x, fangeom)
% y = sinogram space, x = image space
function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, _
angles : vec, angles2 : vec, pos : ivec2, _
voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar)
theta = angles[pos[1]] % angle of the source
d = -angles2[pos[0]] % angle of the beam
inv_voxelpitch = 1.0/voxelpitch
alpha = d+theta % the angle of the ray
v = -[cos(alpha), sin(alpha)]
srcpos = float(size(x)) * 0.5 + inv_voxelpitch * distsrcobj * [cos(theta), sin(theta)] % position of the source
% compute the intersection of the ray with the field of view
lambda1 = ([size(x,0)-2.0, 1.0]-srcpos[0])/v[0]
lambda2 = ([size(x,1)-2.0, 1.0]-srcpos[1])/v[1]
lambda = [max(min(lambda1),min(lambda2)),min(max(lambda1),max(lambda2))]
sum = 0.0
for i=lambda[0]..lambda[1]
p = srcpos + i * v
sum += x[p] % nearest neighbor interpolation
% linear interpolation
%f=floor(p)
%a=frac(p)
%sum += x[f]*(1.0-a[0])*(1.0-a[1]) + _
% x[f+[0,1]]*(1.0-a[0])*a[1] + _
% x[f+[1,0]]*a[0]*(1.0-a[1]) + _
% x[f+[1,1]]*a[0]*a[1]
end
y[pos] = sum
end
angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - _
0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
y = zeros(fangeom.dimx,numel(fangeom.angles))
parallel_do(size(y),x,y,fangeom.angles,angles2,fangeom.voxelpitch, _
fangeom.distsrcdet, fangeom.distsrcobj, process)
end
% Function: adjoint_fanbeam_projection
%
% adjoint ray-based fanbeam projection using nearest neighbor/linear interpolation.
% (sinogram domain -> image domain).
%
% :function y = adjoint_fanbeam_projection(x, fangeom, sz)
%
% Notes:
% The adjoint is not the same as the backward projection, however a reconstruction can be
% obtained using e.g. the conjugate gradient algorithm.
%
% Parameters:
% x - sinogram data
% fangeom - fanbeam geometry (see <forward_fanbeam_projection>)
% sz - size of the output image
function y = adjoint_fanbeam_projection(x, fangeom, sz)
% x = sinogram space, y = image space
function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, _
angles : vec, angles2 : vec, pos : ivec2, _
voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar)
theta = angles[pos[1]] % angle of the source
d = -angles2[pos[0]] % angle of the beam
inv_voxelpitch = 1.0/voxelpitch
alpha = d+theta % the angle of the ray
v = -[cos(alpha), sin(alpha)]
srcpos = float(size(y)) * 0.5 + inv_voxelpitch * distsrcobj * _
[cos(theta), sin(theta)] % position of the source
% compute the intersection of the ray with the field of view
lambda1 = ([size(y,0)-2.0, 1.0]-srcpos[0])/v[0]
lambda2 = ([size(y,1)-2.0, 1.0]-srcpos[1])/v[1]
lambda = [max(min(lambda1),min(lambda2)),min(max(lambda1),max(lambda2))]
sum = x[pos]
for i=lambda[0]..lambda[1]
p = int(srcpos + i * v)
y[p] += sum % nearest neighbor interpolation
% linear interpolation
%f=floor(p)
%a=frac(p)
%y[f] += (1.0-a[0])*(1.0-a[1])*sum
%y[f+[0,1]] += (1.0-a[0])*a[1]*sum
%y[f+[1,0]] += a[0]*(1.0-a[1])*sum
%y[f+[1,1]] += a[0]*a[1]*sum
end
end
% Precompute the angles of the source to the detector array elements
angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - 0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
y = zeros(sz)
parallel_do(size(x),x,y,fangeom.angles,angles2,fangeom.voxelpitch, fangeom.distsrcdet, fangeom.distsrcobj, process)
end
% Function: forward_dd_fanbeam_projection
%
% Ray-based forward Distance-Driven projection: a reimplementation
% of the Distance-Driven algorithm of De Man, although with a simplification:
% (image domain -> sinogram domain)
% One first important detail is the projection normalization (because the sum of
% the weights of the rays that go through a point is not necessarily 1).
% Here, we perform the normalization directly in the image domain, prior to
% the forward projection. We can call this a PRE-normalization.
% Correspondingly, in the adjoint projector <adjoint_dd_fanbeam_projection>,
% the normalization is performed in image domain, AFTER the projection.
%
% A second detail is that the projection axis used to calculate the weights,
% is the detector array itself (some implementations use the horizontal or
% vertical axes), assuming circular pixels.
%
% There is another subtle detail, in some other distance driven implementation,
% the normalization weights are accumulated per projection angle, resulting
% in a cube of weights of size(im,0)*size(im,1)*numel(fangeom.angles) - which
% practically corresponds to a large amount of memory that needs to be pre-
% allocated. Here, we simply use the sum of these weights (sum(weights,2)).
% It is not completely clear to me how this choice affects the image quality.
% However, it is sure that the approach here is numerically more stable because
% of the larger weights (obtained after summing).
%
% The normalization is the source of possible numerical accuracy errors,
% especially when the normalization values are very small, and in case of
% ill-posed reconstruction (e.g. when either the number of detector samples is
% too small, or the number of projection angles). This effect is even
% strengthened by the fact that order of the parallel computations is not
% guaranteed in the adjoint projection. One possible improvement would be
% to incorporate Kahan's summing algorithm. It may also be useful to
% compare single precision accuracy vs. double precision accurary (the command
% line option -double in Quasar).
%
% :function y = forward_dd_fanbeam_projection(x, ni, fangeom)
%
% Parameters:
% x - input image (image domain)
% ni - normalization image: a matrix with pre-computed normalization values.
% fangeom - fanbeam geometry
function y = forward_dd_fanbeam_projection(x, ni, fangeom)
% x = image space, y = sinogram space, ni = normalization image
function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, angles : vec'unchecked, _
voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar, pixelpitch : scalar, detoffset : scalar, _
pos : ivec2, blkpos : ivec2, blkdim : ivec2)
theta = angles[pos[0]] % the projection angle
ray_dir = [cos(theta), sin(theta)] % direction of the ray
srcpos = float(size(x)) * 0.5 + distsrcobj * ray_dir / voxelpitch % position of the source
srcpos_orth = [-srcpos[1], srcpos[0]]
detarraydir = [-sin(theta), cos(theta)] * pixelpitch / voxelpitch % the direction of the detector array
% position of the detector array, corrected with the detector offset
detarraypos = float(size(x)) * 0.5 + (distsrcobj - distsrcdet) * ray_dir / voxelpitch + _
(detoffset - 0.5*size(y,0)) * detarraydir
halfpixelbound = [-sin(theta), cos(theta)]
% compute the thread index (assuming that there are 32 warps, and that each row is
% processed sequentially)
nthreads = 4
thread_idx = mod(blkpos[1],nthreads)
dims = size(y,0)
assert(dims<=1024 && nthreads<=4)
% allocate shared memory for this thread
y_val = shared(dims,nthreads)
% initialize
for i=0..size(y,0)-1
y_val[i, thread_idx] = 0
end
syncthreads
for m=0..size(x,0)-1
p = [m, pos[1]] % the position of the pixel we are considering
% project from srcpos through p onto the detector array
p1 = float(p) + 0.5
p2 = p1 - halfpixelbound % left boundary
p3 = p1 + halfpixelbound % right boundary
smp2 = srcpos_orth - [-p2[1], p2[0]] % orthogonal of (s-p)
smp3 = srcpos_orth - [-p3[1], p3[0]]
% compute the detector positions
lambda2 = dotprod(smp2, p2 - detarraypos) / dotprod(smp2, detarraydir)
lambda3 = dotprod(smp3, p3 - detarraypos) / dotprod(smp3, detarraydir)
det_x = min(int(lambda2),int(lambda3))
[min1, max1] = [min(lambda2,lambda3), max(lambda2,lambda3)]
while det_x >= 0 && det_x < size(y,0) && det_x < lambda3
[min2, max2] = [float(det_x), float(det_x+1)]
% compute the overlap with the detector element - we exploit the fact
% that we know that there IS an overlap, which gives a simple formula
overlap = min(max1, max2) - max(min1, min2)
% note that the normalization has already been done here...
y_val[det_x, thread_idx] += overlap * x[p]
det_x += 1
end
end
syncthreads
% now we perform a reduction to compute the final values
if blkdim[1] > 1
bit = 1
while bit < nthreads
if mod(thread_idx,bit*2) == 0 && blkpos[1] < nthreads
for i=0..size(y,0)-1
y_val[i, thread_idx] = y_val[i, thread_idx] + y_val[i,thread_idx + bit]
end
endif
syncthreads
bit *= 2
end
% write the results to the output - we again use some parallellization for this
nblocks = int((size(y,0) + blkdim[1] - 1) / blkdim[1])
for block=0..nblocks-1
i = block * blkdim[1] + blkpos[1]
if i < size(y,0)
y[i, pos[0]] = y_val[i, 0]
endif
end
else
for i=0..size(y,0)-1
y[i, pos[0]] = y_val[i, 0]
end
endif
end
y = zeros(fangeom.dimx,numel(fangeom.angles))
blksize = max_block_size(process,[1,size(x,1),1]) % note: block size is important to avoid bank conflicts
parallel_do([[numel(fangeom.angles),size(x,1),1],blksize],x./ni,y,fangeom.angles,fangeom.voxelpitch, fangeom.distsrcdet, _
fangeom.distsrcobj, fangeom.pixelpitch, fangeom.detoffset, process)
end
% Function: adjoint_dd_fanbeam_projection
%
% Adjoint Ray-based forward Distance-Driven projection
% (sinogram domain -> image domain)
%
% :function [y, ni] = adjoint_dd_fanbeam_projection(x, fangeom, sz)
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
%
% See also:
% <forward_dd_fanbeam_projection>
function [y, ni] = adjoint_dd_fanbeam_projection(x, fangeom, sz)
% x = sinogram space, y = image space, ni = normalization image
function [] = __kernel__ process(x : mat'unchecked, y : mat'unchecked, ni : mat'unchecked, angles : vec'unchecked, _
voxelpitch : scalar, distsrcdet : scalar, distsrcobj : scalar, pixelpitch : scalar, detoffset : scalar, _
pos : ivec2, blkpos : ivec2, blkdim : ivec2)
theta = angles[pos[0]] % the projection angle
ray_dir = [cos(theta), sin(theta)] % direction of the ray
srcpos = float(size(y)) * 0.5 + distsrcobj * ray_dir / voxelpitch % position of the source
srcpos_orth = [-srcpos[1], srcpos[0]]
detarraydir = [-sin(theta), cos(theta)] * pixelpitch / voxelpitch % the direction of the detector array
% position of the detector array, corrected with the detector offset
detarraypos = float(size(y)) * 0.5 + (distsrcobj - distsrcdet) * ray_dir / voxelpitch + _
(detoffset - 0.5*size(x,0)) * detarraydir
halfpixelbound = [-sin(theta), cos(theta)]
for m=0..size(y,0)-1
p = [m, pos[1]] % the position of the pixel we are considering
% project from srcpos through p onto the detector array
p1 = float(p) + 0.5
p2 = p1 - halfpixelbound % left boundary
p3 = p1 + halfpixelbound % right boundary
smp2 = srcpos_orth - [-p2[1], p2[0]] % orthogonal of (s-p)
smp3 = srcpos_orth - [-p3[1], p3[0]]
% compute the detector positions
lambda2 = dotprod(smp2, p2 - detarraypos) / dotprod(smp2, detarraydir)
lambda3 = dotprod(smp3, p3 - detarraypos) / dotprod(smp3, detarraydir)
det_x = min(int(lambda2),int(lambda3))
[min1, max1] = [min(lambda2,lambda3), max(lambda2,lambda3)]
while det_x >= 0 && det_x < size(x,0) && det_x < lambda3
[min2, max2] = [float(det_x), float(det_x+1)]
% compute the overlap with the detector element - we exploit the fact
% that we know that there IS an overlap, which gives a simple formula
overlap = min(max1, max2) - max(min1, min2)
% note that the normalization has already been done here...
y[p] += overlap * x[det_x, pos[0]]
ni[p] += overlap
det_x += 1
end
end
end
y = zeros(sz)
ni = zeros(sz)
% choose the block size so that we are free of bank conflicts
blksize = max_block_size(process,[1,sz[1],1])
parallel_do([[numel(fangeom.angles),sz[1],1],blksize],x,y,ni,fangeom.angles,fangeom.voxelpitch, fangeom.distsrcdet, _
fangeom.distsrcobj, fangeom.pixelpitch, fangeom.detoffset, process)
ni = ni .^ 4
% apply the normalization
y = y ./ ni
end
% Function: cg_fanbeam_reconstruction
%
% Reconstruction using conjugate gradients - naive forward and adjoint projections
% <forward_fanbeam_projection> and <adjoint_fanbeam_projection>
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
%
function x = cg_fanbeam_reconstruction(y, fangeom, sz)
W = v -> forward_fanbeam_projection(v, fangeom)
W_H = v -> adjoint_fanbeam_projection(v, fangeom, sz)
dotprod = (v, w) -> sum(sum(v .* w))
x = 0
r = W_H(y) % residual
p = r
new_err = dotprod(r, r)
tol = 1e-8
for it=1..20 % conjugate gradients
err=new_err
print "iteration ",it," err=",err
A_p = W_H(W(p))
alpha = err / dotprod(p, A_p)
x = x + alpha * p
r = r - alpha * A_p
new_err = dotprod(r, r)
if (new_err/numel(x) < tol)
break
endif
beta = new_err / err
p = r + beta * p
end
end
% Function: cg_dd_fanbeam_reconstruction
%
% Reconstruction using conjugate gradients - distance driven projections
% <forward_fanbeam_projection> and <adjoint_fanbeam_projection>
%
% :function x = cg_dd_fanbeam_reconstruction(y, fangeom, sz)
%
% Parameters:
% x - input sinogram data
% ni - normalization image, which is computed by this function.
% sz - size of the output image
% fangeom - fanbeam geometry
%
function x = cg_dd_fanbeam_reconstruction(y, fangeom, sz)
dotprod = (v, w) -> sum(sum(v .* w))
x = 0
[r,ni] = adjoint_dd_fanbeam_projection(y, fangeom, sz) % residual
p = r
new_err = dotprod(r, r)
tol = 1e-10
for it=1..20 % conjugate gradients
err=new_err
print "iteration ",it," err=",err
A_p = forward_dd_fanbeam_projection(p, ni, fangeom)
[A_p,ni] = adjoint_dd_fanbeam_projection(A_p, fangeom, sz)
alpha = err / dotprod(p, A_p)
x = x + alpha * p
r = r - alpha * A_p
new_err = dotprod(r, r)
if (new_err/numel(x) < tol)
break
endif
beta = new_err / err
p = r + beta * p
end
end
% Function: fan2para
%
% Regridding from fan-beam to parallel-beam. This is useful for comparing to
% the FBP reconstruction.
%
% :function y = fan2para(x, fangeom, parageom)
%
% Parameters:
% x - input fanbeam sinogram
% y - output parallelbeam sinogram
% fangeom - fanbeam geometry
% parageom - parallelbeam geometry
%
function y = fan2para(x, fangeom, parageom)
interp_nearest : [__device__ (mat'circular, vec2) -> scalar]
interp_nearest = __device__ (x : mat'circular, pos : vec2) -> x[pos]
interp_linear : [__device__ (mat'circular, vec2) -> scalar]
interp_linear = __device__ (x : mat'circular, pos : vec2) -> _
(f = frac(pos); q = int(pos); _
(1-f[0])*(1-f[1])*x[q] + _
f[0]*(1-f[1])*x[q+[1,0]] + _
(1-f[0])*f[1]*x[q+[0,1]] + _
f[0]*f[1]*x[q+[1,1]])
function [] = __kernel__ fan2para_process(x : mat'circular, y : mat'unchecked, _
para_angles : vec, para_offsets : vec, _
fan_angles : vec, fan_angles2 : vec, distsrcobj : scalar, _
interp : [__device__ (mat, vec2) -> scalar], pos : vec2)
d = para_offsets[pos[0]] % offset along the detector array
theta = para_angles[pos[1]] % angle
alpha_prime = asin(d / distsrcobj)
theta_prime = mod(2.0*pi + theta + alpha_prime, 2.0*pi)
% compute indices
n = numel(fan_angles2)
alpha_idx = (alpha_prime-fan_angles2[0])*float(n-1)/(fan_angles2[n-1]-fan_angles2[0])
% approximation - note: since the angles are not necessarily uniformly spaced
% we may need to do a binary search here...
n = numel(fan_angles)
theta_idx = (theta_prime-fan_angles[0])*float(n-1)/(fan_angles[n-1]-fan_angles[0])
% linear interpolation
y[pos] = interp(x, [alpha_idx, theta_idx])
end
angles2 = atan(fangeom.pixelpitch * (fangeom.detoffset - 0.5*fangeom.dimx + (0..fangeom.dimx-1))/fangeom.distsrcdet)
y = zeros(numel(parageom.offsets),numel(parageom.angles))
parallel_do(size(y), x, y, parageom.angles, parageom.offsets, fangeom.angles, angles2, _
fangeom.distsrcobj/fangeom.voxelpitch, interp_linear, fan2para_process)
end
function [] = main()
% fan-beam scanner geometry
fangeom = object()
fangeom.distsrcdet = 291.20 % distance source-detector
fangeom.distsrcobj = 115.84 % distance source-object
fangeom.voxelpitch = 0.085
fangeom.detoffset = -1.5
fangeom.dimx = 560 % number of detector elements
fangeom.pixelpitch = 0.20 % distance between the detector elements
fangeom.angles = linspace(0,2*pi,512)
normalize = x -> (x / max(max(x))) * 255
im = imread("SheppLogan.png")
im = im[:,:,0]
psd = x -> log(1e-4+fftshift2(abs(fft2(x))))
% Compute the sinogram data
y = forward_dd_fanbeam_projection(im, ones(size(im)), fangeom)
print "Naive algorithm"
tic()
z1 = cg_fanbeam_reconstruction(y, fangeom, size(im))
toc()
print "Distance driven algorithm"
tic()
z2 = cg_dd_fanbeam_reconstruction(y, fangeom, size(im))
toc()
print "FBP algorithm"
parageom = object()
parageom.angles = linspace(0,2*pi,256)
parageom.offsets = -255..256
y_parallel = fan2para(y, fangeom, parageom)
z3 = fbp_parabeam_reconstruction(y_parallel, parageom, size(im), "hann")
imshow(y, []); title("Input sinogram")
imshow(z1, []); title("Reconstructed using the naive algorithm")
imshow(z2, []); title("Reconstructed using the distance driven algorithm")
imshow(z3, []); title("Reconstructed using the FBP algorithm")
end