PSkel
|
00001 //------------------------------------------------------------------------------- 00002 // Copyright (c) 2015, ICEI - PUC Minas 00003 // All rights reserved. 00004 // 00005 // Redistribution and use in source and binary forms, with or without 00006 // modification, are permitted provided that the following conditions are met: 00007 // 00008 // 1. Redistributions of source code must retain the above copyright notice, this 00009 // list of conditions and the following disclaimer. 00010 // 00011 // 2. Redistributions in binary form must reproduce the above copyright notice, 00012 // this list of conditions and the following disclaimer in the documentation 00013 // and/or other materials provided with the distribution. 00014 // 00015 // 3. Neither the name of the copyright holder nor the names of its contributors 00016 // may be used to endorse or promote products derived from this software without 00017 // specific prior written permission. 00018 // 00019 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 00020 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00021 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00022 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 00023 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00024 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 00025 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 00026 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 00027 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00028 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 //------------------------------------------------------------------------------- 00030 00031 #ifndef PSKEL_ARGS_HPP 00032 #define PSKEL_ARGS_HPP 00033 00034 namespace PSkel{ 00035 00036 //******************************************************************************************* 00037 // ARGS 00038 //******************************************************************************************* 00039 00040 template<typename T> 00041 Args<T>::Args(){}; 00042 00043 /* 00044 __stencil__ ~Args(){ 00045 gpuErrchk( cudaFreeHost(hostArray)); 00046 } 00047 */ 00048 00049 template<typename T> 00050 Args<T>::Args(int _width){ 00051 width = _width; 00052 gpuErrchk( cudaDeviceReset() ); 00053 gpuErrchk( cudaSetDeviceFlags(cudaDeviceMapHost) ); 00054 gpuErrchk( cudaHostAlloc((void **) &hostArray, width*sizeof(T), cudaHostAllocWriteCombined | cudaHostAllocMapped) ); 00055 gpuErrchk( cudaHostGetDevicePointer(&deviceArray, hostArray, 0) ); 00056 } 00057 00058 template<typename T> 00059 int Args<T>::getWidth() const{ 00060 return width; 00061 } 00062 00063 template<typename T> 00064 T & Args<T>::operator()(int x) const { 00065 #ifdef __CUDA_ARCH__ 00066 return deviceArray[x]; 00067 #else 00068 return hostArray[x]; 00069 #endif 00070 } 00071 00072 //******************************************************************************************* 00073 // ARGS2D 00074 //******************************************************************************************* 00075 00076 template<typename T> 00077 Args2D<T>::Args2D(){}; 00078 00079 /* 00080 __stencil__ ~Args(){ 00081 gpuErrchk( cudaFreeHost(hostArray)); 00082 } 00083 */ 00084 00085 template<typename T> 00086 Args2D<T>::Args2D(int _width,int _height){ 00087 width = _width; 00088 height = _height; 00089 gpuErrchk( cudaHostAlloc((void **) &hostArray, width*height*sizeof(T), cudaHostAllocWriteCombined | cudaHostAllocMapped) ); 00090 gpuErrchk( cudaHostGetDevicePointer(&deviceArray, hostArray, 0) ); 00091 } 00092 00093 template<typename T> 00094 int Args2D<T>::getWidth() const{ 00095 return width; 00096 } 00097 00098 template<typename T> 00099 int Args2D<T>::getHeight() const{ 00100 return height; 00101 } 00102 00103 template<typename T> 00104 T & Args2D<T>::operator()(int x,int y) const { 00105 #ifdef __CUDA_ARCH__ 00106 return deviceArray[y*width+x]; 00107 #else 00108 return hostArray[y*width+x]; 00109 #endif 00110 } 00111 00112 //******************************************************************************************* 00113 // ARGS3D 00114 //******************************************************************************************* 00115 00116 template<typename T> 00117 Args3D<T>::Args3D(){}; 00118 00119 /* 00120 __stencil__ ~Args(){ 00121 gpuErrchk( cudaFreeHost(hostArray)); 00122 } 00123 */ 00124 00125 template<typename T> 00126 Args3D<T>::Args3D(int _width, int _height, int _depth){ 00127 width = _width; 00128 height = _height; 00129 depth = _depth; 00130 gpuErrchk( cudaHostAlloc((void **) &hostArray, width*height*depth*sizeof(T), cudaHostAllocWriteCombined | cudaHostAllocMapped) ); 00131 gpuErrchk( cudaHostGetDevicePointer(&deviceArray, hostArray, 0) ); 00132 } 00133 00134 template<typename T> 00135 int Args3D<T>::getWidth() const{ 00136 return width; 00137 } 00138 00139 template<typename T> 00140 int Args3D<T>::getHeight() const{ 00141 return height; 00142 } 00143 00144 template<typename T> 00145 int Args3D<T>::getDepth() const{ 00146 return depth; 00147 } 00148 00149 template<typename T> 00150 T & Args3D<T>::operator()(int x,int y,int z) const { 00151 #ifdef __CUDA_ARCH__ 00152 return deviceArray[(z*height + y)*width + x]; 00153 #else 00154 return hostArray[(z*height + y)*width + x]; 00155 #endif 00156 } 00157 00158 }//end namespace 00159 00160 #endif