Version in base suite: 1.10.0+dfsg.1-1 Base version: plastimatch_1.10.0+dfsg.1-1 Target version: plastimatch_1.10.0+dfsg.2-1~deb13u1 Base file: /srv/ftp-master.debian.org/ftp/pool/main/p/plastimatch/plastimatch_1.10.0+dfsg.1-1.dsc Target file: /srv/ftp-master.debian.org/policy/pool/main/p/plastimatch/plastimatch_1.10.0+dfsg.2-1~deb13u1.dsc .gitignore | 5 .gitlab-ci.yml | 18 - debian/changelog | 34 ++ debian/copyright | 13 debian/salsa-ci.yml | 3 debian/watch | 2 src/register/cuda/viscous_compute.cu | 459 --------------------------- src/register/cuda/viscous_convolution.cu | 349 -------------------- src/register/cuda/viscous_finalize.cu | 128 ------- src/register/cuda/viscous_funcHistogram.cu | 206 ------------ src/register/cuda/viscous_funcImageDomain.cu | 272 ---------------- src/register/cuda/viscous_initialize.cu | 267 --------------- src/register/cuda/viscous_main.cu | 296 ----------------- src/register/viscous_convolution.h | 125 ------- src/register/viscous_global.h | 211 ------------ 15 files changed, 47 insertions(+), 2341 deletions(-) dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp0w9lujq5/plastimatch_1.10.0+dfsg.1-1.dsc: no acceptable signature found dpkg-source: warning: cannot verify inline signature for /srv/release.debian.org/tmp/tmp0w9lujq5/plastimatch_1.10.0+dfsg.2-1~deb13u1.dsc: no acceptable signature found diff -Nru plastimatch-1.10.0+dfsg.1/.gitignore plastimatch-1.10.0+dfsg.2/.gitignore --- plastimatch-1.10.0+dfsg.1/.gitignore 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/.gitignore 1970-01-01 00:00:00.000000000 +0000 @@ -1,5 +0,0 @@ -*~ -\#*\# -GPATH -GRTAGS -GTAGS diff -Nru plastimatch-1.10.0+dfsg.1/.gitlab-ci.yml plastimatch-1.10.0+dfsg.2/.gitlab-ci.yml --- plastimatch-1.10.0+dfsg.1/.gitlab-ci.yml 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/.gitlab-ci.yml 1970-01-01 00:00:00.000000000 +0000 @@ -1,18 +0,0 @@ -# This file is a template, and might need editing before it works on your project. -# Full project: https://gitlab.com/pages/doxygen -image: python:alpine - -pages: - script: - - apk update && apk add doxygen ghostscript texlive texlive-dvi texmf-dist-latexextra - - doxygen src/Doxyfile - - mkdir -p public - - mv html/ public/doxygen - - ls public/doxygen - - pip install -U sphinx - - sphinx-build -b html doc/sphinx public - artifacts: - paths: - - public - only: - - master diff -Nru plastimatch-1.10.0+dfsg.1/debian/changelog plastimatch-1.10.0+dfsg.2/debian/changelog --- plastimatch-1.10.0+dfsg.1/debian/changelog 2025-02-13 04:48:49.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/debian/changelog 2026-02-22 09:49:57.000000000 +0000 @@ -1,3 +1,37 @@ +plastimatch (1.10.0+dfsg.2-1~deb13u1) trixie; urgency=medium + + * Team upload. + * Backport removal of non-free files to trixie. + (Changes in the repack tooling also resulted in the removal of + upstream source files .gitlab-ci.yml and .gitignore.) + * Revert unstable changes irrelevant for trixie: + - "d/control: declare compliance to standards version 4.7.3. + - "d/control: drop redundant Rules-Requires-Root: no. + - "d/control: drop redundant Priority: optional. + - "d/t/control: drop deprecated skip-not-installable restriction. + - "cmake4.patch: fix build failure with cmake 4. + * d/watch: rollback to watch file version 4. + This change preserves the bump to +dfsg.2. + + -- Étienne Mollier Sun, 22 Feb 2026 10:49:57 +0100 + +plastimatch (1.10.0+dfsg.2-1) unstable; urgency=medium + + * Team upload. + * d/copyright: exclude files preventing commercial uses. + * d/watch: bump to +dfsg.2 repack suffix. + This change also converts the watch file to v5 uscan Gitlab template. + * New upstream repack 1.10.0+dfsg.2 (Closes: #1124959) + * cmake4.patch: fix build failure with cmake 4. (Closes: #1125557) + * d/copyright: remove superfluous file pattern. + * d/t/control: drop deprecated skip-not-installable restriction. + * d/control: drop redundant Priority: optional. + * d/control: drop redundant Rules-Requires-Root: no. + * d/control: declare compliance to standards version 4.7.3. + * d/salsa-ci.yml: disable i386 builds. + + -- Étienne Mollier Sat, 21 Feb 2026 21:52:09 +0100 + plastimatch (1.10.0+dfsg.1-1) unstable; urgency=medium * [8e67811] New upstream version 1.10.0+dfsg.1 diff -Nru plastimatch-1.10.0+dfsg.1/debian/copyright plastimatch-1.10.0+dfsg.2/debian/copyright --- plastimatch-1.10.0+dfsg.1/debian/copyright 2025-02-13 04:48:49.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/debian/copyright 2026-02-22 09:49:57.000000000 +0000 @@ -19,6 +19,15 @@ libs/lua-5.1.4 libs/msinttypes libs/sqlite-3.6.21 + src/register/cuda/viscous_compute.cu + src/register/cuda/viscous_convolution.cu + src/register/cuda/viscous_finalize.cu + src/register/cuda/viscous_funcHistogram.cu + src/register/cuda/viscous_funcImageDomain.cu + src/register/cuda/viscous_initialize.cu + src/register/cuda/viscous_main.cu + src/register/viscous_convolution.h + src/register/viscous_global.h Files: * Copyright: (c) 2004-2015 Massachusetts General Hospital @@ -151,10 +160,6 @@ Copyright: 1998 Nicholas Devillard License: public-domain -Files: libs/itk-3.20.0/* -Copyright: 1999-2003 Insight Software Consortium -License: BSD-3-Clause - Files: libs/liblbfgs-1.9/* Copyright: 1990 Jorge Nocedal, 2007-2010 Naoaki Okazaki License: MIT diff -Nru plastimatch-1.10.0+dfsg.1/debian/salsa-ci.yml plastimatch-1.10.0+dfsg.2/debian/salsa-ci.yml --- plastimatch-1.10.0+dfsg.1/debian/salsa-ci.yml 2025-02-13 04:48:49.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/debian/salsa-ci.yml 2026-02-22 09:49:57.000000000 +0000 @@ -2,3 +2,6 @@ include: - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml + +variables: + SALSA_CI_DISABLE_BUILD_PACKAGE_I386: "true" diff -Nru plastimatch-1.10.0+dfsg.1/debian/watch plastimatch-1.10.0+dfsg.2/debian/watch --- plastimatch-1.10.0+dfsg.1/debian/watch 2025-02-13 04:48:49.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/debian/watch 2026-02-22 09:49:57.000000000 +0000 @@ -1,4 +1,4 @@ version=4 -opts="repacksuffix=+dfsg.1,searchmode=plain,\ +opts="repacksuffix=+dfsg.2,searchmode=plain,\ dversionmangle=s/\+(debian|dfsg|ds|deb).*(\d+)?$//" \ https://gitlab.com/plastimatch/plastimatch/tags?sort=updated_desc -/archive/v?\d[\d.]+/@PACKAGE@-@ANY_VERSION@@ARCHIVE_EXT@ diff -Nru plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_compute.cu plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_compute.cu --- plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_compute.cu 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_compute.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,459 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - -/******************************************************************* -c* Short discription * -c* main function to register two images on the current scale * -c* including upsample and downsample * -c******************************************************************/ - -#include -#include -#include -#include -#include -#include -#include -#include "viscous_convolution.h" -#include "viscous_global.h" - -// hash a point in the unit square to the index of -// the grid bucket that contains it -struct point_to_bucket_index : public thrust::unary_function -{ - __host__ __device__ - point_to_bucket_index(unsigned int width, unsigned int height) - :w(width),h(height){} - - __host__ __device__ - unsigned int operator()(float2 p) const - { -// find the raster indices of p's bucket - unsigned int x = static_cast(p.x * (w-1)); - unsigned int y = static_cast(p.y * (h-1)); - -// return the bucket's linear index - return y * w + x; - } - - unsigned int w, h; -}; - -__global__ void downSample(float *src, float *dest, int NX, int NY, int NZ, int s) -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - - if(tid < NX*NY*NZ) - { - int z = tid/(NX*NY); - int y = (tid%(NX*NY))/NX; - int x = tid%NX; - - float sum =0.0f; - for(int xs = 0; xs(); - cutilSafeCall( cudaMalloc3DArray(&d_im_move_array, &channelDesc, volumeSize) ); - cudaMemcpy3DParms copyParams = {0}; - copyParams.srcPtr = make_cudaPitchedPtr((void*)d_im_move, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams.dstArray = d_im_move_array; - copyParams.extent = volumeSize; - copyParams.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params) ); - - - d_im_move_tex.normalized = false; - d_im_move_tex.filterMode = cudaFilterModeLinear; - - - cutilSafeCall(cudaBindTextureToArray(d_im_move_tex, d_im_move_array, channelDesc)); - - -// bind vector flows to texture - cutilSafeCall( cudaMalloc3DArray(&d_mv_x_array, &channelDesc, volumeSize) ); - cudaMemcpy3DParms copyParams_x = {0}; - copyParams_x.srcPtr = make_cudaPitchedPtr((void*)d_mv_x, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_x.dstArray = d_mv_x_array; - copyParams_x.extent = volumeSize; - copyParams_x.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_x) ); - d_mv_x_tex.normalized = false; - d_mv_x_tex.filterMode = cudaFilterModeLinear; - - - cutilSafeCall( cudaMalloc3DArray(&d_mv_y_array, &channelDesc, volumeSize) ); - cudaMemcpy3DParms copyParams_y = {0}; - copyParams_y.srcPtr = make_cudaPitchedPtr((void*)d_mv_y, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_y.dstArray = d_mv_y_array; - copyParams_y.extent = volumeSize; - copyParams_y.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_y) ); - d_mv_y_tex.normalized = false; - d_mv_y_tex.filterMode = cudaFilterModeLinear; - - cutilSafeCall( cudaMalloc3DArray(&d_mv_z_array, &channelDesc, volumeSize) ); - cudaMemcpy3DParms copyParams_z = {0}; - copyParams_z.srcPtr = make_cudaPitchedPtr((void*)d_mv_z, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_z.dstArray = d_mv_z_array; - copyParams_z.extent = volumeSize; - copyParams_z.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_z) ); - d_mv_z_tex.normalized = false; - d_mv_z_tex.filterMode = cudaFilterModeLinear; - - float *d_im_out; - cutilSafeCall( cudaMalloc((void **)&d_im_out, sDATA_SIZE) ); - - - -// velocity - float *d_v_x, *d_v_x_copy; - float *d_v_y, *d_v_y_copy; - float *d_v_z, *d_v_z_copy; - cutilSafeCall( cudaMalloc((void **)&d_v_x, sDATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_v_y, sDATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_v_z, sDATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_v_x_copy, sDATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_v_y_copy, sDATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_v_z_copy, sDATA_SIZE) ); - - -// setup for computing joint histogram via thrust -// the grid data structure keeps a range per grid bucket: -// each bucket_begin[i] indexes the first element of bucket i's list of points -// each bucket_end[i] indexes one past the last element of bucket i's list of points - thrust::device_vector bucket_begin(nBin*nBin); - thrust::device_vector bucket_end(nBin*nBin); - -// allocate storage for each point's bucket index - thrust::device_vector bucket_indices(NX*NY*NZ); - -// allocate space to hold per-bucket sizes - thrust::device_vector bucket_sizes(nBin*nBin); - -// allocate float2 vector - float2 *d_points; - cudaMalloc((void**) &d_points, sizeof(float2)*NX*NY*NZ); - - - - int regrid = 0; - float MI[1000]; - - int3 Dims; - Dims.x = NX; - Dims.y = NY; - Dims.z = NZ; - - - - for(int it=0; it>>(d_mv_x, d_mv_y, d_mv_z, d_im_out, NX, NY, NZ); - - -// joint histogram via thrust ----- begin -// convert to float2 vector - transToFloat2<<>>(d_im_out, d_im_static, d_points, NX*NY*NZ); - -// use a thrust ptr to wrap the raw pointer - thrust::device_ptr points_t(d_points); - -// transform the points to their bucket indices - thrust::transform(points_t, points_t+NX*NY*NZ, bucket_indices.begin(), point_to_bucket_index(nBin,nBin)); - -// sort the bucket index - thrust::sort(bucket_indices.begin(), bucket_indices.end()); - -// find the beginning of each bucket's list of points - thrust::counting_iterator search_begin(0); - thrust::lower_bound(bucket_indices.begin(), bucket_indices.end(), search_begin, - search_begin + nBin*nBin, bucket_begin.begin()); - -// find the end of each bucket's list of points - thrust::upper_bound(bucket_indices.begin(), bucket_indices.end(), search_begin, - search_begin + nBin*nBin, bucket_end.begin()); - -// take the difference between bounds to find each bucket size - thrust::transform(bucket_end.begin(), bucket_end.end(), bucket_begin.begin(), - bucket_sizes.begin(), thrust :: minus()); - -// now hist contains the histogram - unsigned int *hist = thrust::raw_pointer_cast(&bucket_sizes[0]); - - copyHist<<>>(hist, d_jointHistogram); -// joint histogram via thrust ----- end - - - -// compute the convolution of joint histogram - myconv2dGPU<<>>(d_jointHistogram, d_jointHistogram_conv, GaussKernelH, nBin, nBin, 3*hValue); - - -// normalize joint histogram - float sum = cublasSasum (nBin*nBin, d_jointHistogram_conv , 1); - cublasSscal (nBin*nBin, 1.0f/sum, d_jointHistogram_conv, 1); - - -// compute mutual info by GPU - marginalDist<<>>(d_jointHistogram_conv, d_probx, d_proby); - - switch (METHOD) - { - case 1: - marginalBnorm_sum<<>>(d_jointHistogram_conv, d_probx, d_proby, d_jointHistogram); - marginalDistAlongY<<>>(d_jointHistogram, d_Bsum); - BnormGPU<<>>(d_jointHistogram_conv, d_probx, d_proby,d_Bsum, d_jointHistogram); - break; - case 2: - mutualInfoGPU<<>>(d_jointHistogram_conv, d_probx, d_proby, d_jointHistogram); - break; - } - MI[it] = cublasSasum (nBin*nBin, d_jointHistogram_conv, 1); - printf("mutual information (%d)= %f\n", it, MI[it]); - - -// NOTE: after this step, jointHistogram becomes the likelihood -// compute the first derivative w.r.t. x-dim of joint histogram - myconv2dGPU<<>>(d_jointHistogram, d_jointHistogram_conv, GaussKernelHx, nBin, nBin,3*hValue); - -// compute the force - forceComp<<>>(d_im_out, d_im_static, d_jointHistogram_conv, d_v_x, d_v_y, d_v_z, NX, NY, NZ); - - ImageSmooth(d_v_x, d_v_x_copy,Dims); - ImageSmooth(d_v_y, d_v_y_copy,Dims); - ImageSmooth(d_v_z, d_v_z_copy,Dims); - - flowComp<<>>(d_mv_x, d_mv_y, d_mv_z, d_v_x_copy, d_v_y_copy, d_v_z_copy, d_v_x, d_v_y, NX, NY, NZ); -// NOTE: d_v_x is Jacobian, d_v_y is the max flow -// d_v_x_copy, d_v_y_copy, d_v_z_copy are the displacement - - thrust :: device_ptr data_ptr(d_v_y); - int maxInd = cublasIsamax(NX*NY*NZ, d_v_y, 1) -1; - float maxflow = data_ptr[maxInd]; - float dt = (du/maxflow); // > 1) ? 1 : du/maxflow; - printf("dt = %f \n", dt); - - flowUpdate<<>>(d_mv_x, d_mv_y, d_mv_z, d_v_x_copy, d_v_y_copy, d_v_z_copy,dt, NX, NY, NZ); - -// regridding if Jacobian < threshJaco - sum = cublasSasum(NX*NY*NZ, d_v_x, 1); - if (sum>0.5) - { - regrid ++; - - printf("regrid = %d\n", regrid); -// save d_im_move to be d_im_out - - cudaUnbindTexture(d_im_move_tex); - cudaMemcpy3DParms copyParams = {0}; - copyParams.srcPtr = make_cudaPitchedPtr((void*)d_im_out, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams.dstArray = d_im_move_array; - copyParams.extent = volumeSize; - copyParams.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params) ); - cutilSafeCall(cudaBindTextureToArray(d_im_move_tex, d_im_move_array)); - - -// update vector flow - ImageWarp_mv<<>>(d_mv_x, d_mv_y, d_mv_z, NX, NY, NZ); - - cudaMemcpy3DParms copyParams_x = {0}; - copyParams_x.srcPtr = make_cudaPitchedPtr((void*)d_mv_x, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_x.dstArray = d_mv_x_array; - copyParams_x.extent = volumeSize; - copyParams_x.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_x) ); - cutilSafeCall(cudaBindTextureToArray(d_mv_x_tex, d_mv_x_array)); - - cudaMemcpy3DParms copyParams_y = {0}; - copyParams_y.srcPtr = make_cudaPitchedPtr((void*)d_mv_y, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_y.dstArray = d_mv_y_array; - copyParams_y.extent = volumeSize; - copyParams_y.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_y) ); - cutilSafeCall(cudaBindTextureToArray(d_mv_y_tex, d_mv_y_array)); - - cudaMemcpy3DParms copyParams_z = {0}; - copyParams_z.srcPtr = make_cudaPitchedPtr((void*)d_mv_z, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams_z.dstArray = d_mv_z_array; - copyParams_z.extent = volumeSize; - copyParams_z.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params_z) ); - cutilSafeCall(cudaBindTextureToArray(d_mv_z_tex, d_mv_z_array)); - - - cutilSafeCall( cudaMemset(d_mv_x, 0, sDATA_SIZE) ); - cutilSafeCall( cudaMemset(d_mv_y, 0, sDATA_SIZE) ); - cutilSafeCall( cudaMemset(d_mv_z, 0, sDATA_SIZE) ); - - - - - } // end for regridding - - - - - } // for-loop iteration - - - if (!regrid) - { - ImageWarp<<>>(d_mv_x, d_mv_y, d_mv_z, d_im_move, NX, NY, NZ); - } - else - { - cudaMemcpy3DParms copyParams = {0}; - cudaUnbindTexture(d_im_move_tex); - copyParams.srcPtr = make_cudaPitchedPtr((void*)d_im_move, volumeSize.width*sizeof(float), volumeSize.width, volumeSize.height); - copyParams.dstArray = d_im_move_array; - copyParams.extent = volumeSize; - copyParams.kind = cudaMemcpyDeviceToDevice; - cutilSafeCall( cudaMemcpy3D(©Params) ); - cutilSafeCall(cudaBindTextureToArray(d_im_move_tex, d_im_move_array)); - - ImageWarp_final<<>>(d_mv_x, d_mv_y, d_mv_z,d_im_move, NX, NY, NZ); - - - } - - - - cudaFree(d_points); - - - - cudaFree(d_v_x); - cudaFree(d_v_y); - cudaFree(d_v_z); - cudaFree(d_v_x_copy); - cudaFree(d_v_y_copy); - cudaFree(d_v_z_copy); - - - cudaUnbindTexture(d_im_move_tex); - cudaFreeArray(d_im_move_array); - - cudaUnbindTexture(d_mv_x_tex); - cudaFreeArray(d_mv_x_array); - - cudaUnbindTexture(d_mv_y_tex); - cudaFreeArray(d_mv_y_array); - - cudaUnbindTexture(d_mv_z_tex); - cudaFreeArray(d_mv_z_array); - - cudaFree(d_im_out); - - -} - - -__global__ void transToFloat2(const float *input1, const float *input2, float2 *output, const int n) -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; -// obtain current id on thread - - if (tid < n) - { - output[tid] = make_float2(input1[tid], input2[tid]); - } - -} diff -Nru plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_convolution.cu plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_convolution.cu --- plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_convolution.cu 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_convolution.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,349 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - -/******************************************************************* -c* Short discription * -c* convolution related support functions * -c* in SDK as well as developped by Xuejun Gu and Yifei Lou * -c******************************************************************/ - - -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ -#include -#include "viscous_global.h" -#include "viscous_convolution.h" - -//////////////////////////////////////////////////////////////////////////////// -// Row convolution filter by Frame -//////////////////////////////////////////////////////////////////////////////// -__global__ void convolutionRowGPU_byframe( - float *d_Result, - float *d_Data, - int dataW, - int dataH, - int nF -){ - //Data cache - __shared__ float data[KERNEL_RADIUS + ROW_TILE_W + KERNEL_RADIUS]; - - //Current tile and apron limits, relative to row start - const int tileStart = IMUL(blockIdx.x, ROW_TILE_W); - const int tileEnd = tileStart + ROW_TILE_W - 1; - const int apronStart = tileStart - KERNEL_RADIUS; - const int apronEnd = tileEnd + KERNEL_RADIUS; - - //Clamp tile and apron limits by image borders - const int tileEndClamped = min(tileEnd, dataW - 1); - const int apronStartClamped = max(apronStart, 0); - const int apronEndClamped = min(apronEnd, dataW - 1); - - //Row start index in d_Data[] - const int rowStart = nF*dataW*dataH+IMUL(blockIdx.y, dataW); - - //Aligned apron start. Assuming dataW and ROW_TILE_W are multiples - //of half-warp size, rowStart + apronStartAligned is also a - //multiple of half-warp size, thus having proper alignment - //for coalesced d_Data[] read. - const int apronStartAligned = tileStart - KERNEL_RADIUS_ALIGNED; - - const int loadPos = apronStartAligned + threadIdx.x; - //Set the entire data cache contents - //Load global memory values, if indices are within the image borders, - //or initialize with zeroes otherwise - if(loadPos >= apronStart){ - const int smemPos = loadPos - apronStart; - - data[smemPos] = - ((loadPos >= apronStartClamped) && (loadPos <= apronEndClamped)) ? - d_Data[rowStart + loadPos] : 0; - } - - - //Ensure the completness of the loading stage - //because results, emitted by each thread depend on the data, - //loaded by another threads - __syncthreads(); - const int writePos = tileStart + threadIdx.x; - //Assuming dataW and ROW_TILE_W are multiples of half-warp size, - //rowStart + tileStart is also a multiple of half-warp size, - //thus having proper alignment for coalesced d_Result[] write. - if(writePos <= tileEndClamped){ - const int smemPos = writePos - apronStart; - float sum = 0; - sum = convolutionRow<2 * KERNEL_RADIUS>(data + smemPos); - - d_Result[rowStart + writePos] = sum; - } -} - - -//////////////////////////////////////////////////////////////////////////////// -// Column convolution filter -//////////////////////////////////////////////////////////////////////////////// -__global__ void convolutionColumnGPU( - float *d_Result, - float *d_Data, - int dataW, - int dataH, - int smemStride, - int gmemStride, - int nF -){ - //Data cache - __shared__ float data[COLUMN_TILE_W * (KERNEL_RADIUS + COLUMN_TILE_H + KERNEL_RADIUS)]; - - //Current tile and apron limits, in rows - const int tileStart = IMUL(blockIdx.y, COLUMN_TILE_H); - const int tileEnd = tileStart + COLUMN_TILE_H - 1; - const int apronStart = tileStart - KERNEL_RADIUS; - const int apronEnd = tileEnd + KERNEL_RADIUS; - - //Clamp tile and apron limits by image borders - const int tileEndClamped = min(tileEnd, dataH - 1); - const int apronStartClamped = max(apronStart, 0); - const int apronEndClamped = min(apronEnd, dataH - 1); - - //Current column index - const int columnStart = IMUL(blockIdx.x, COLUMN_TILE_W) + threadIdx.x; - - //Shared and global memory indices for current column - - int smemPos = IMUL(threadIdx.y, COLUMN_TILE_W) + threadIdx.x; - //int gmemPos = IMUL(apronStart + threadIdx.y, dataW) + columnStart; - int gmemPos = IMUL(apronStart + threadIdx.y, dataW) + columnStart + nF *dataH *dataW; // added by xuejun - - //Cycle through the entire data cache - //Load global memory values, if indices are within the image borders, - //or initialize with zero otherwise - for(int y = apronStart + threadIdx.y; y <= apronEnd; y += blockDim.y){ - data[smemPos] = - ((y >= apronStartClamped) && (y <= apronEndClamped)) ? - d_Data[gmemPos] : 0; - smemPos += smemStride; - gmemPos += gmemStride; - } - - //Ensure the completness of the loading stage - //because results, emitted by each thread depend on the data, - //loaded by another threads - __syncthreads(); - - //Shared and global memory indices for current column - smemPos = IMUL(threadIdx.y + KERNEL_RADIUS, COLUMN_TILE_W) + threadIdx.x; - //gmemPos = IMUL(tileStart + threadIdx.y , dataW) + columnStart; - gmemPos = IMUL(tileStart + threadIdx.y , dataW) + columnStart + nF *dataH *dataW; // added by xuejun - - //Cycle through the tile body, clamped by image borders - //Calculate and output the results - for(int y = tileStart + threadIdx.y; y <= tileEndClamped; y += blockDim.y){ - float sum = 0; - - sum = convolutionColumn<2 * KERNEL_RADIUS>(data + smemPos); - - d_Result[gmemPos] = sum; - smemPos += smemStride; - gmemPos += gmemStride; - } -} -//////////////////////////////////////////////////////////////////////////////// -// Frame convolution filter -//////////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////////// -// Frame convolution filter -//////////////////////////////////////////////////////////////////////////////// -__global__ void convolutionFrameGPU( - float *d_Result, - float *d_Data, - int dataW, - int dataH, - int smemStride, - int gmemStride -){ - //Data cache - __shared__ float data[COLUMN_TILE_W * (KERNEL_RADIUS + COLUMN_TILE_H + KERNEL_RADIUS)]; - - //Current tile and apron limits, in rows - const int tileStart = IMUL(blockIdx.y, COLUMN_TILE_H); - const int tileEnd = tileStart + COLUMN_TILE_H - 1; - const int apronStart = tileStart - KERNEL_RADIUS; - const int apronEnd = tileEnd + KERNEL_RADIUS; - - //Clamp tile and apron limits by image borders - const int tileEndClamped = min(tileEnd, dataH - 1); - const int apronStartClamped = max(apronStart, 0); - const int apronEndClamped = min(apronEnd, dataH - 1); - - //Current column index - const int columnStart = IMUL(blockIdx.x, COLUMN_TILE_W) + threadIdx.x; - - //Shared and global memory indices for current column - int smemPos = IMUL(threadIdx.y, COLUMN_TILE_W) + threadIdx.x; - int gmemPos = IMUL(apronStart + threadIdx.y, dataW) + columnStart; - //Cycle through the entire data cache - //Load global memory values, if indices are within the image borders, - //or initialize with zero otherwise - for(int y = apronStart + threadIdx.y; y <= apronEnd; y += blockDim.y){ - data[smemPos] = - ((y >= apronStartClamped) && (y <= apronEndClamped)) ? - d_Data[gmemPos] : 0; - smemPos += smemStride; - gmemPos += gmemStride; - } - - //Ensure the completness of the loading stage - //because results, emitted by each thread depend on the data, - //loaded by another threads - __syncthreads(); - //Shared and global memory indices for current column - smemPos = IMUL(threadIdx.y + KERNEL_RADIUS, COLUMN_TILE_W) + threadIdx.x; - gmemPos = IMUL(tileStart + threadIdx.y , dataW) + columnStart; - //Cycle through the tile body, clamped by image borders - //Calculate and output the results - for(int y = tileStart + threadIdx.y; y <= tileEndClamped; y += blockDim.y){ - float sum = 0; - - sum = convolutionColumn<2 * KERNEL_RADIUS>(data + smemPos); - - d_Result[gmemPos] = sum; - smemPos += smemStride; - gmemPos += gmemStride; - } -} - - - /**************************************************************************/ - /********** Doing low pass filter on an image function ****************/ - /**************************************************************************/ - - -void ImageSmooth(float *d_image, float *d_image_conv, int3 Dims) -{ - - - //Dims: size of image - - int DATA_W = Dims.x, DATA_H = Dims.y, DATA_F = Dims.z; - float *d_temp; - int SDATA_SIZE = DATA_W*DATA_H*DATA_F*sizeof(float); - - cudaMalloc((void**)&d_temp, SDATA_SIZE); - - // row convolution: - dim3 blockGridRows(iDivUp(DATA_W, ROW_TILE_W), DATA_H, 1); - dim3 threadBlockRows(KERNEL_RADIUS_ALIGNED + ROW_TILE_W + KERNEL_RADIUS, 1); - - cudaMemset((void*)d_image_conv, 0, SDATA_SIZE); - for (int nF = 0; nF < DATA_F; nF++){ - convolutionRowGPU_byframe<<>>(d_image_conv, d_image,DATA_W, DATA_H, nF); - cutilCheckMsg("convolutionRowGPU() execution failed\n"); - } - - //column convolution - dim3 blockGridColumns(iDivUp(DATA_W, COLUMN_TILE_W),iDivUp(DATA_H, COLUMN_TILE_H), 1); - dim3 threadBlockColumns(COLUMN_TILE_W, 8); - cudaMemset((void*)d_temp, 0, SDATA_SIZE); - for (int nF = 0; nF < DATA_F; nF++){ - convolutionColumnGPU<<>> - (d_temp, d_image_conv, DATA_W, DATA_H, COLUMN_TILE_W * threadBlockColumns.y,DATA_W * threadBlockColumns.y,nF); - cutilCheckMsg("convolutionColumnGPU() execution failed\n"); - } - - // frame convolution - dim3 blockGridFrames(iDivUp(DATA_W *DATA_H , COLUMN_TILE_W),iDivUp(DATA_F, COLUMN_TILE_H),1) ; - dim3 threadBlockFrames(COLUMN_TILE_W, 8); - cudaMemset((void*)d_image_conv, 0, SDATA_SIZE); - convolutionFrameGPU<<>> - (d_image_conv, d_temp, DATA_W *DATA_H, DATA_F, COLUMN_TILE_W * threadBlockFrames.y,DATA_W*DATA_H * threadBlockFrames.y); - cutilCheckMsg("convolutionFrameGPU() execution failed\n"); - - - cudaFree(d_temp); - - -} - -__global__ void myconv2dGPU(float *src, float *dest, float *kernel, int M, int N, int kn) -// [M,N] = size(src); kernel size = 2*kn +1 -// symmetric boundary condition -{ - - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - - if (tid= M) - x0 = 2*M-1-x+2*i; - for(int j = -kn; j<=kn; j++) - { - y0 = y; - if(y-j<0) - y0 = -1+2*j-y; - if(y-j>=N) - y0 = 2*N-1-y+2*j; - sum += kernel[(j+kn)*(2*kn+1)+(i+kn)]*src[(y0-j)*M+(x0-i)]; - } - } - dest[tid] = sum; - - } - -} diff -Nru plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_finalize.cu plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_finalize.cu --- plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_finalize.cu 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_finalize.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,128 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - -/******************************************************************* -c* Short discription * -c* Finalize the reconstruction on the current scale and for the * -c* entire program, output results, release memory spaces for * -c* global variables, etc. * -c******************************************************************/ - -#include -#include -#include -#include -#include "viscous_global.h" - -void fina() -{ -// map output image to its original scale - nblocks.x = NBLOCKX; - nblocks.y = ((1 + (NX0*NY0*NZ0 - 1)/NTHREAD_PER_BLOCK) - 1) / NBLOCKX + 1; - - printf("moving image: max = %f, min = %f\n", max_im_move, min_im_move); - intensityRescale<<>>(d_im_move[0], max_im_move, min_im_move, -1); - -// output results - outputData(d_im_move[0], DATA_SIZE, outputfilename); - outputData(d_mv_x[0], DATA_SIZE, output_mv_x); - outputData(d_mv_y[0], DATA_SIZE, output_mv_y); - outputData(d_mv_z[0], DATA_SIZE, output_mv_z); - -// free up the host and device -// image pyramid - for(int scale =0; scale 0) - { - if(probx[i]>0 && proby[j]>0) - likelihood[tid] = log2f(likelihood[tid]/probx[i]/proby[j]); - else - likelihood[tid] = log2f(likelihood[tid]); - - jointHist[tid] = jointHist[tid]*likelihood[tid]; - } - - } - -} - -__global__ void marginalBnorm_sum(float *jointHist, float *probx, float *proby, float *Bsum) -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - if (tid=0) - x = ImageGradient(d_im_out[zmin*NX*NY + ymin*NX + (xmin-1) ], d_im_out[zmin*NX*NY +ymin*NX + xmin], d_im_out[zmin*NX*NY +ymin*NX + (xmin+1)]); - else x = 0; - - if(ymin+1 < NY && ymin-1>=0) - y = ImageGradient(d_im_out[zmin*NX*NY +(ymin-1)*NX + xmin], d_im_out[zmin*NX*NY +ymin*NX+ xmin], d_im_out[zmin*NX*NY +(ymin+1)*NX+xmin]); - else y = 0; - if(zmin+1 =0) - z = ImageGradient(d_im_out[(zmin-1)*NX*NY + ymin*NX + xmin], d_im_out[zmin*NX*NY + ymin*NX + xmin],d_im_out[(zmin+1)*NX*NY + ymin*NX + xmin]); - else z = 0; - - d_v_x[tid] = -ALPHA*dLx*x; - d_v_y[tid] = -ALPHA*dLx*y; - d_v_z[tid] = -ALPHA*dLx*z; - - -} -} - -__global__ void flowComp(float *d_mv_x, float *d_mv_y, float *d_mv_z, float *d_v_x, float *d_v_y, float *d_v_z, float *jacobian, float *flow, int NX, int NY, int NZ) -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - -if (tid=0) - { - u1x = ImageGradient(d_mv_x[z*NX*NY+y*NX+x-1], d_mv_x[z*NX*NY+y*NX+x], d_mv_x[z*NX*NY+y*NX+x+1]); - u2x = ImageGradient(d_mv_y[z*NX*NY+y*NX+x-1], d_mv_y[z*NX*NY+y*NX+x], d_mv_y[z*NX*NY+y*NX+x+1]); - u3x = ImageGradient(d_mv_z[z*NX*NY+y*NX+x-1], d_mv_z[z*NX*NY+y*NX+x], d_mv_y[z*NX*NY+y*NX+x+1]); - } - else - { - u1x = 0; - u2x = 0; - u3x = 0; - } - - if(y+1=0) - { - u1y = ImageGradient(d_mv_x[z*NX*NY+(y-1)*NX+x], d_mv_x[z*NX*NY+y*NX+x], d_mv_x[z*NX*NY+(y+1)*NX+x]); - u2y = ImageGradient(d_mv_y[z*NX*NY+(y-1)*NX+x], d_mv_y[z*NX*NY+y*NX+x], d_mv_y[z*NX*NY+(y+1)*NX+x]); - u3y = ImageGradient(d_mv_z[z*NX*NY+(y-1)*NX+x], d_mv_z[z*NX*NY+y*NX+x], d_mv_z[z*NX*NY+(y+1)*NX+x]); - } - else - { - u1y = 0; - u2y = 0; - u3y = 0; - } - - if(z+1=0) - { - u1z = ImageGradient(d_mv_x[(z-1)*NX*NY+y*NX+x], d_mv_x[z*NX*NY+y*NX+x], d_mv_x[(z+1)*NX*NY+y*NX+x]); - u2z = ImageGradient(d_mv_y[(z-1)*NX*NY+y*NX+x], d_mv_y[z*NX*NY+y*NX+x], d_mv_y[(z+1)*NX*NY+y*NX+x]); - u3z = ImageGradient(d_mv_z[(z-1)*NX*NY+y*NX+x], d_mv_z[z*NX*NY+y*NX+x], d_mv_z[(z+1)*NX*NY+y*NX+x]); - } - else - { - u1z = 0; - u2z = 0; - u3z = 0; - } - - float R1 = d_v_x[tid] - d_v_x[tid]*u1x - d_v_y[tid]*u1y - d_v_z[tid]*u1z; - float R2 = d_v_y[tid] - d_v_x[tid]*u2x - d_v_y[tid]*u2y - d_v_z[tid]*u2z; - float R3 = d_v_z[tid] - d_v_x[tid]*u3x - d_v_y[tid]*u3y - d_v_z[tid]*u3z; - - float jaco = (1.0f-u1x)*(1.0f-u2y)*(1.0f-u3z)-u3x*u1y*u2z - u2x*u3y*u1z - -(1.0f-u1x)*u3y*u2z - u3x*(1.0f-u2y)*u1z - u2x*u1y*(1.0f-u3z); - jacobian[tid] = (fabs(jaco)<= threshJaco) ? 1.0f : 0.0f; - - flow[tid] = sqrtf(R1 * R1 + R2 * R2 + R3 * R3); - -// d_v_x, d_v_y, d_v_z become displacement - d_v_x[tid] = R1; - d_v_y[tid] = R2; - d_v_z[tid] = R3; - -} -} - -__global__ void flowUpdate(float *d_mv_x, float *d_mv_y, float *d_mv_z, float *d_disp_x, float *d_disp_y, float *d_disp_z, float dt, int NX, int NY, int NZ) -{ - - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - -if (tid0) - { - if(x>y) - return y; - else return x; - } - else - { - if(x>y) return x; - else return y; - } -} diff -Nru plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_initialize.cu plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_initialize.cu --- plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_initialize.cu 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_initialize.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,267 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - -/******************************************************************* -c* Short discription * -c* initialization of the entire program including * -c* data preprocessing, construction of image pyramid * -c* and Gaussian smoothing kernels * -c******************************************************************/ - -#include -#include -#include -#include -#include -#include "viscous_convolution.h" -#include "viscous_global.h" - -void dataPreprocessing(float *image, float *maxValue, float *minValue) -{ - thrust :: device_ptr data_ptr(image); - int maxInd = cublasIsamax(NX0*NY0*NZ0, image, 1) -1; - int minInd = cublasIsamin(NX0*NY0*NZ0, image, 1) -1; - *maxValue = data_ptr[maxInd]; - *minValue = data_ptr[minInd]; - - nblocks.x = NBLOCKX; - nblocks.y = ((1 + (NX0*NY0*NZ0 - 1)/NTHREAD_PER_BLOCK) - 1) / NBLOCKX + 1; - - intensityRescale<<>>(image, *maxValue, *minValue, 1); - -} - - -__global__ void intensityRescale(float *image, float maxValue, float minValue, int type) -// type > 0: forward -// type < 0: backward -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - - if(tid < NX0*NY0*NZ0) - { - if(type>0) - image[tid] = (image[tid] - minValue)/(maxValue-minValue); - else - image[tid] = image[tid]*(maxValue-minValue) + minValue; - - } -} - -__global__ void short2float(short *raw, float *image, int type) -{ - const int tid = (blockIdx.y*NBLOCKX + blockIdx.x)*blockDim.x + threadIdx.x; - - if(tid < NX0*NY0*NZ0) - { - if (type>0) - image[tid] = (float) raw[tid]; - else - raw[tid] = (short) image[tid]; - } - -} - -void initData() -{ - -// load static and moving images on host - float *h_im_static = (float*)malloc(DATA_SIZE); - loadData(h_im_static, DATA_SIZE, inputfilename_static); - float *h_im_move = (float *)malloc(DATA_SIZE); - loadData(h_im_move, DATA_SIZE, inputfilename_move); - - -// create image pyramid on device -// finest level 0 - cutilSafeCall( cudaMalloc((void**) &d_im_move[0], DATA_SIZE ) ); - cutilSafeCall( cudaMalloc((void**) &d_im_static[0],DATA_SIZE) ); - - cutilSafeCall( cudaMemcpy(d_im_static[0], h_im_static, DATA_SIZE, cudaMemcpyHostToDevice) ); - cutilSafeCall( cudaMemcpy(d_im_move[0], h_im_move, DATA_SIZE, cudaMemcpyHostToDevice) ); - - - free(h_im_static); - free(h_im_move); - -// scale intensity to [0, 1] - dataPreprocessing(d_im_static[0],&max_im_move, &min_im_move); - dataPreprocessing(d_im_move[0], &max_im_move, &min_im_move); - - - -// vector flow - cutilSafeCall( cudaMalloc((void **)&d_mv_x[0], DATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_mv_y[0], DATA_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_mv_z[0], DATA_SIZE) ); - - cutilSafeCall( cudaMemset(d_mv_x[0], 0, DATA_SIZE) ); - cutilSafeCall( cudaMemset(d_mv_y[0], 0, DATA_SIZE) ); - cutilSafeCall( cudaMemset(d_mv_z[0], 0, DATA_SIZE) ); - - -// coarse levels - for(int scale = 1; scale < NSCALE; scale ++) - { - NX = NX0/pow(2, scale); - NY = NY0/pow(2, scale); - NZ = (NZ0-1)/pow(2, scale) +1; - - sDATA_SIZE = sizeof(float)*NX*NY*NZ; - - cutilSafeCall( cudaMalloc((void**) &d_im_move[scale], sDATA_SIZE ) ); - cutilSafeCall( cudaMalloc((void**) &d_im_static[scale], sDATA_SIZE ) ); - - nblocks.x = NBLOCKX; - nblocks.y = ((1 + (NX*NY*NZ - 1)/NTHREAD_PER_BLOCK) - 1) / NBLOCKX + 1; - - int s = pow(2, scale); - - downSample<<>>(d_im_move[0], d_im_move[scale], NX, NY, NZ, s); - downSample<<>>(d_im_static[0], d_im_static[scale],NX, NY, NZ, s); - - cutilSafeCall( cudaMalloc((void **)&d_mv_x[scale], sDATA_SIZE ) ); - cutilSafeCall( cudaMalloc((void **)&d_mv_y[scale], sDATA_SIZE ) ); - cutilSafeCall( cudaMalloc((void **)&d_mv_z[scale], sDATA_SIZE ) ); - - cutilSafeCall( cudaMemset(d_mv_x[scale], 0, sDATA_SIZE ) ); - cutilSafeCall( cudaMemset(d_mv_y[scale], 0, sDATA_SIZE ) ); - cutilSafeCall( cudaMemset(d_mv_z[scale], 0, sDATA_SIZE ) ); - - - } - - - - std::cout << "Load data successfully.\n\n"; - - -// allocate space for histogram related variables - cutilSafeCall( cudaMalloc(&d_jointHistogram, HIST_SIZE) ); - cutilSafeCall( cudaMalloc(&d_jointHistogram_conv, HIST_SIZE) ); - cutilSafeCall( cudaMalloc((void **)&d_probx, sizeof(float)*nBin) ); - cutilSafeCall( cudaMalloc((void **)&d_proby, sizeof(float)*nBin) ); - cutilSafeCall( cudaMalloc((void **)&d_Bsum,sizeof(float)*nBin ) ); - -} - - - - - -void initGaussKernel() -{ - std::cout << "Initializing Gaussian kernel..." << std::endl; - float *h_GaussKernelH = (float *)malloc(sizeof(float)*(6*hValue+1)*(6*hValue+1)); - float *h_GaussKernelHx = (float *)malloc(sizeof(float)*(6*hValue+1)*(6*hValue+1)); - cutilSafeCall( cudaMalloc(&GaussKernelH, sizeof(float)*(6*hValue+1)*(6*hValue+1) ) ); - cutilSafeCall( cudaMalloc(&GaussKernelHx, sizeof(float)*(6*hValue+1)*(6*hValue+1) ) ); - - - float sum = 0.0; - for(int i = -3*hValue; i <= 3*hValue; i++) - { - for(int j = -3*hValue; j <= 3*hValue; j++) - { - - float temp = expf(-(i*i+j*j)/2.0/hValue/hValue); - h_GaussKernelH[(i+3*hValue)+(j+3*hValue)*(6*hValue+1)] = temp; - sum += temp; - - h_GaussKernelHx[(i+3*hValue)+(j+3*hValue)*(6*hValue+1)] = -i*temp/hValue/hValue; - - } - } - for(int i = -3*hValue; i <= 3*hValue; i++) - { - for(int j = -3*hValue; j <= 3*hValue; j++) - { - h_GaussKernelH[(i+3*hValue)+(j+3*hValue)*(6*hValue+1)] /= sum; - h_GaussKernelHx[(i+3*hValue)+(j+3*hValue)*(6*hValue+1)] /= sum; - } - } - - cutilSafeCall( cudaMemcpy(GaussKernelH, h_GaussKernelH, sizeof(float)*(6*hValue+1)*(6*hValue+1), cudaMemcpyHostToDevice) ); - cutilSafeCall( cudaMemcpy(GaussKernelHx, h_GaussKernelHx, sizeof(float)*(6*hValue+1)*(6*hValue+1), cudaMemcpyHostToDevice) ); - - - - - - free(h_GaussKernelH); - free(h_GaussKernelHx); - - float *h_GaussKernel1D = (float *)malloc(sizeof(float)*KERNEL_W); - float sumK = 0.0; - for(int i = 0; i < KERNEL_W; i++) - { - h_GaussKernel1D[i] = expf(-(i-KERNEL_RADIUS)*(i-KERNEL_RADIUS)/2.0/sValue/sValue); - sumK += h_GaussKernel1D[i]; - } - - - for(int i = 0; i < KERNEL_W; i++) - h_GaussKernel1D[i] /= sumK; - - cudaMemcpyToSymbol(d_Kernel, h_GaussKernel1D, KERNEL_W * sizeof(float)); - - - free(h_GaussKernel1D); - -} - -void loadData(float *dest, int sizeInByte, const char *filename) -// load data -{ - FILE *fp = fopen(filename,"rb"); - if( fp == NULL ) - { - printf(" File \'%s\' could not be opened!\n", filename); - exit(1); - } - - else{ - printf("Reading File \'%s\' ... \n", filename); - - size_t temp = fread(dest, 1, sizeInByte, fp); - fclose(fp); - } - -} diff -Nru plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_main.cu plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_main.cu --- plastimatch-1.10.0+dfsg.1/src/register/cuda/viscous_main.cu 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/cuda/viscous_main.cu 1970-01-01 00:00:00.000000000 +0000 @@ -1,296 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - -/******************************************************************* -c* Short discription * -c* main code of the multi-modal deformable registration * -c* it calls all the other components * -c******************************************************************/ - - - -// includes, system -#include -#include -#include -#include - - -#define GCS_REPRESS_EXTERNS -// includes, gloable variables -#include "viscous_global.h" -//#include "convolution.cu" -#include "viscous_cuda.h" - -// includes, project -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include // for float2 - -using namespace std; -using namespace thrust; - -// include files -//#include "initialize.cu" -//#include "funcHistogram.cu" -//#include "funcImageDomain.cu" -//#include "compute.cu" -//#include "finalize.cu" - - -/* Global variables */ -/*************************************** -* global variables declaration -***************************************/ -char inputfilename_move[100] = "./inputDTI.raw"; -// image move -char inputfilename_static[100] = "./inputT2.raw"; -// image static -char outputfilename[100] = "./outputDTI.raw"; -// image out -char output_mv_x[100] = "./output_xFlow.dat"; -char output_mv_y[100] = "./output_yFlow.dat"; -char output_mv_z[100] = "./output_zFlow.dat"; - -float *h_im_static, *h_im_move; -// image pyramid -float *d_im_static[NSCALE], *d_im_move[NSCALE]; -// vector flow -float *d_mv_x[NSCALE], *d_mv_y[NSCALE], *d_mv_z[NSCALE]; - - -// gaussian kernel -float *GaussKernelH, *GaussKernelHx; - -// histogram related -float *d_jointHistogram; -float *d_jointHistogram_conv; -float *d_probx, *d_proby; -float *d_Bsum; - -dim3 nblocks; -dim3 nblocks_hist; - -int NX, NY, NZ, sDATA_SIZE; -// dimension at current pyramid level -float max_im_move, min_im_move; -// max and min intensity of the moving image - - -cudaArray *d_im_move_array; -texture d_im_move_tex; - - -cudaArray *d_mv_x_array, *d_mv_y_array, *d_mv_z_array; -texture d_mv_x_tex; -texture d_mv_y_tex; -texture d_mv_z_tex; - -int deviceCount; -cudaDeviceProp dP; - -/**************************************************** - main program -****************************************************/ -int -CUDA_viscous_main ( - int argc, - char** argv -) -{ - cout << endl << "****************************************" << endl; - cout << "Computation parameters..." << endl; - cout << "****************************************" << endl ; - - int device = DEVICENUMBER; -// device number - - cudaSetDevice(device); - cout << "Using device # " << device << endl; -// choose which device to use - - cudaGetDeviceCount(&deviceCount); - cudaGetDeviceProperties(&dP,device); - cout<<"Max threads per block: "<=0; scale--) - { - NX = NX0/pow(2, scale); - NY = NY0/pow(2, scale); - NZ = (NZ0-1)/pow(2, scale) + 1; - - sDATA_SIZE = (NX*NY*NZ)* sizeof(float); - - nblocks.x = NBLOCKX; - nblocks.y = ((1 + (NX*NY*NZ - 1)/NTHREAD_PER_BLOCK) - 1) - / NBLOCKX + 1; - printf ("current scale = %d, size of image = %d x %d x %d ... \n", - scale, NX, NY, NZ); - if(scale>>( - d_mv_x[scale+1], d_mv_x[scale], NX, NY, NZ); - upSample<<>>( - d_mv_y[scale+1], d_mv_y[scale], NX, NY, NZ); - upSample<<>>( - d_mv_z[scale+1], d_mv_z[scale], NX, NY, NZ); - } - compute ( - d_im_move[scale], d_im_static[scale], - d_mv_x[scale], d_mv_y[scale], d_mv_z[scale], MAX_ITER); - printf("\n\n"); - } - - cudaThreadSynchronize(); -#if defined (commentout) - cutilCheckError( cutStopTimer( timer)); - printf("\n\n****************************************\n"); - printf( "Computing time: %f (ms)\n", cutGetTimerValue( timer)); - printf("****************************************\n\n\n"); - cutilCheckError( cutDeleteTimer( timer)); -#endif -// mark the end timer and print - -/****************************************************** - finalize -******************************************************/ - - printf("Finalizing program...\n\n"); - - fina(); - -/**** shut down CBLAS ********/ - - status = cublasShutdown(); - if (status != CUBLAS_STATUS_SUCCESS) - { - fprintf (stderr, "!!!! shutdown error (A)\n"); - getchar(); - exit(0); - } -// Shut down CUBLAS - - cudaThreadSynchronize(); - -// mark the end total timer -#if defined (commentout) - cutilCheckError( cutStopTimer( totalTimer)); - printf("\n\n****************************************\n"); - printf( "Entire program time: %f (ms)\n", cutGetTimerValue( totalTimer)); - printf("****************************************\n\n\n"); - cutilCheckError( cutDeleteTimer( totalTimer)); -#endif - - printf("Have a nice day!\n"); - - cudaThreadExit(); - -#if defined (commentout) - cutilExit(argc, argv); -#endif - return 0; -} diff -Nru plastimatch-1.10.0+dfsg.1/src/register/viscous_convolution.h plastimatch-1.10.0+dfsg.2/src/register/viscous_convolution.h --- plastimatch-1.10.0+dfsg.1/src/register/viscous_convolution.h 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/viscous_convolution.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,125 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ - - - -#ifndef _3DImageSmooth_H_ -#define _3DImageSmooth_H_ - -#define KERNEL_RADIUS 8 -const int KERNEL_W = (2 * KERNEL_RADIUS + 1); - - -// Assuming ROW_TILE_W, KERNEL_RADIUS_ALIGNED and dataW -// are multiples of coalescing granularity size, -// all global memory operations are coalesced in convolutionRowGPU() -#define ROW_TILE_W 128 -#define KERNEL_RADIUS_ALIGNED 16 - - -// Assuming COLUMN_TILE_W and dataW are multiples -// of coalescing granularity size, all global memory operations -// are coalesced in convolutionColumnGPU() -#define COLUMN_TILE_W 16 -#define COLUMN_TILE_H 48 - - -//#define UNROLL_INNER // for fast convolution - - -//////////////////////////////////////////////////////////////////////////////// -// Common host and device functions -//////////////////////////////////////////////////////////////////////////////// -//Round a / b to nearest higher integer value -inline int iDivUp(int a, int b){ - return (a % b != 0) ? (a / b + 1) : (a / b); -} - -//24-bit multiplication is faster on G80, -//but we must be sure to multiply integers -//only within [-8M, 8M - 1] range -#define IMUL(a, b) __mul24(a, b) - - -__device__ __constant__ float d_Kernel[KERNEL_W]; - - -//////////////////////////////////////////////////////////////////////////////// -// Loop unrolling templates, needed for best performance -//////////////////////////////////////////////////////////////////////////////// - -template __device__ float convolutionRow(float *data){ - return - data[KERNEL_RADIUS - i] * d_Kernel[i] - + convolutionRow(data); -} - -template<> __device__ float convolutionRow<-1>(float *data){ - return 0; -} - -template __device__ float convolutionColumn(float *data){ - return - data[(KERNEL_RADIUS - i) * COLUMN_TILE_W] * d_Kernel[i] - + convolutionColumn(data); -} - -template<> __device__ float convolutionColumn<-1>(float *data){ - return 0; -} - - -//////////////////////////////////////////////////////////////////////////////// -// GPU convolution -//////////////////////////////////////////////////////////////////////////////// - -//subroutines - -void ImageSmooth(float *d_image, float *d_image_conv, int3 Dims); -__global__ void myconv2dGPU(float *src, float *dest, float *kernel, int M, int N, int kn); - -#endif - - - - - - - diff -Nru plastimatch-1.10.0+dfsg.1/src/register/viscous_global.h plastimatch-1.10.0+dfsg.2/src/register/viscous_global.h --- plastimatch-1.10.0+dfsg.1/src/register/viscous_global.h 2024-09-16 23:22:25.000000000 +0000 +++ plastimatch-1.10.0+dfsg.2/src/register/viscous_global.h 1970-01-01 00:00:00.000000000 +0000 @@ -1,211 +0,0 @@ -/******************************************************************* -c* Multimodal Deformable Image Registration * -c* via Mutual Information or Bhattacharyya Distantce * -c* Version: 1.0 * -c* Language: C, CUDA * -c* * -c* Developer: Yifei Lou * -c* Email: yifei.lou@ece.gatech.edu * -c* * -c* School of Electrical and Computer Engineering * -c* Georgia Institute of Technology * -c* Atlanta, GA, 30318 * -c* Website: http://groups.bme.gatech.edu/groups/bil/ * -c* * -c* Copyright (c) 2011 * -c* All rights reserved. * -c* * -c* Permission to use, copy, or modify this code and its * -c* documentation for scientific purpose is hereby granted * -c* without fee, provided that this copyright notice appear in * -c* all copies and that both that copyright notice and this * -c* permission notice appear in supporting documentation. The use * -c* for commercial purposes is prohibited without permission. * -c* * -c* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * -c* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * -c* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * -c* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * -c* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * -c* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * -c* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES INCLUDING, BUT NOT * -c* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF* -c* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * -c* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * -c* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * -c* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * -c* THE POSSIBILITY OF SUCH DAMAGE. * -c* * -c******************************************************************/ -#ifndef _viscous_global_h_ -#define _viscous_global_h_ - -/************************************** -* CUDA parameters -**************************************/ -#define NTHREAD_PER_BLOCK 256 -// number of threads per block along each direction - -#define NBLOCKX 1024 -// the leading dimension of the 2d thread grid - -#define DEVICENUMBER 0 -// device number to be used - - - -/************************************** -* parameters -**************************************/ -#define NX0 256 -#define NY0 256 -#define NZ0 64 -#define DATA_SIZE ((NX0*NY0*NZ0)*sizeof(float)) -#define NSCALE 2 -// number of scales in the image pyramid: 0 is the finest, NSCALE-1 is the coarest - - -#define MAX_ITER 20 -// max # of iterations in the finest grid -// # of iterations at each level = 2^scale*MAX_ITER - - - -// histogram related parameters -#define nBin 256 -// number of histogram bins - -#define hValue 4 -// int, parameter in the Gaussian for convolution histogram -#define HIST_SIZE (nBin*nBin*sizeof(float)) - - -#define sValue 3 -// int, parameter in the Gaussian for updating velocity -#define sLength (6*sValue+1) - - -// image domain parameters -#define ALPHA 500 -// parameter in the force calculation - -#define du 0.6 -// parameter to dynamically define dt - -#define METHOD 1 -// 1 for Bnorm, 2 for MI - -#define threshJaco 0.5 -// threshold for Jacobian to regridding - - -#define EPS 0.000001 - - -/*************************************** -* global variables declaration -***************************************/ -#ifndef GCS_REPRESS_EXTERNS -extern char inputfilename_move[100]; -// image move -extern char inputfilename_static[100]; -// image static -extern char outputfilename[100]; -// image out -extern char output_mv_x[100]; -extern char output_mv_y[100]; -extern char output_mv_z[100]; - - -extern float *h_im_static, *h_im_move; -// image pyramid -extern float *d_im_static[NSCALE], *d_im_move[NSCALE]; -// vector flow -extern float *d_mv_x[NSCALE], *d_mv_y[NSCALE], *d_mv_z[NSCALE]; - - -// gaussian kernel -extern float *GaussKernelH, *GaussKernelHx; - -// histogram related -extern float *d_jointHistogram; -extern float *d_jointHistogram_conv; -extern float *d_probx, *d_proby; -extern float *d_Bsum; - -extern dim3 nblocks; -extern dim3 nblocks_hist; - -extern int NX, NY, NZ, sDATA_SIZE; -// dimension at current pyramid level -extern float max_im_move, min_im_move; -// max and min intensity of the moving image - - -extern cudaArray *d_im_move_array; -extern texture d_im_move_tex; - - -extern cudaArray *d_mv_x_array, *d_mv_y_array, *d_mv_z_array; -extern texture d_mv_x_tex; -extern texture d_mv_y_tex; -extern texture d_mv_z_tex; - -extern int deviceCount; -extern cudaDeviceProp dP; -// device properties - -#endif /* GCS_REPRESS_EXTERNS */ - -/************************************* - -* simple math functions - -***************************************/ -__host__ __device__ float minmod(float x, float y); -__device__ float ImageGradient(float Im, float I, float Ip); - - -/**************************************** -* Data processing -****************************************/ -void dataPreprocessing(float *image, float *maxValue, float *minValue); -__global__ void intensityRescale(float *image, float maxValue, float minValue, int type); -// type >0 forward calculation: rescale image intensity to [0,1] -// type <0 backward: map intensity to its original scale - -void loadData(float *dest, int sizeInByte, const char *filename); -void outputData(void *src, int size, const char *outputfilename); - -/*************************************** -* function declaration -***************************************/ -void initData(); -void initGaussKernel(); -void fina(); -void compute(float *d_im_move, float *d_im_static, float *d_mv_x, float *d_mv_y, float *d_mv_z, int maxIter); - - -/**************************************** -* kernel declaration -****************************************/ -__global__ void upSample(float *src, float *dest, int NX, int NY, int NZ); -__global__ void downSample(float *src, float *dest, int NX, int NY, int NZ, int s); - -__global__ void ImageWarp(float *mv_x, float *mv_y, float *mv_z, float *dest, int NX, int NY, int NZ); -__global__ void ImageWarp_mv(float *mv_x, float *mv_y, float *mv_z, int NX, int NY, int NZ); -__global__ void ImageWarp_final(float *mv_x, float *mv_y, float *mv_z, float *dest, int NX, int NY, int NZ); - -__global__ void forceComp(float *d_im_out, float *d_im_static, float *d_Likelihood, float *d_v_x, float *d_v_y, float *d_v_z, int NX, int NY, int NZ); -__global__ void flowComp(float *d_mv_x, float *d_mv_y, float *d_mv_z, float *d_v_x, float *d_v_y, float *d_v_z, float *jacobian, float *flow, int NX, int NY, int NZ); -__global__ void flowUpdate(float *d_mv_x, float *d_mv_y, float *d_mv_z, float *d_disp_x, float *d_disp_y, float *d_disp_z, float dt, int NX, int NY, int NZ); - -__global__ void marginalDist(float *jointHist, float *probx, float *proby); -__global__ void mutualInfoGPU(float *jointHist, float *probx, float *proby, float *likelihood); -__global__ void copyHist(unsigned int *hist, float *jointHist); -__global__ void marginalBnorm_sum(float *jointHist, float *probx, float *proby, float *Bsum); -__global__ void marginalDistAlongY(float *jointHist, float *dest); -__global__ void BnormGPU(float *jointHist, float *probx, float *proby, float *Bsum, float *likelihood); -__global__ void transToFloat2(const float *input1, const float *input2, float2 *output, const int n); - -#endif