From 6fc7b5590903320dbcd8fa03e1cadcad7fd41cd5 Mon Sep 17 00:00:00 2001 From: Martin Bauer <martin.bauer@fau.de> Date: Sat, 20 May 2017 14:26:09 +0200 Subject: [PATCH] CUDA support --- CMakeLists.txt | 41 +++ apps/tutorials/CMakeLists.txt | 1 + apps/tutorials/cuda/01_GameOfLife_cuda.cpp | 134 +++++++++ apps/tutorials/cuda/01_GameOfLife_cuda.dox | 139 +++++++++ apps/tutorials/cuda/01_GameOfLife_kernels.cu | 40 +++ apps/tutorials/cuda/01_GameOfLife_kernels.h | 12 + apps/tutorials/cuda/CMakeLists.txt | 7 + apps/tutorials/cuda/GosperGliderGun.png | Bin 0 -> 228 bytes cmake/waLBerlaFunctions.cmake | 18 +- src/cuda/AddGPUFieldToStorage.h | 72 +++++ src/cuda/AddGPUFieldToStorage.impl.h | 91 ++++++ src/cuda/CMakeLists.txt | 9 + src/cuda/ErrorChecking.h | 53 ++++ src/cuda/FieldAccessor.h | 112 +++++++ src/cuda/FieldAccessorXYZ.h | 79 +++++ src/cuda/FieldCopy.h | 209 +++++++++++++ src/cuda/FieldIndexing.cpp | 225 ++++++++++++++ src/cuda/FieldIndexing.h | 92 ++++++ src/cuda/FieldIndexingXYZ.cpp | 123 ++++++++ src/cuda/FieldIndexingXYZ.h | 79 +++++ src/cuda/GPUField.cpp | 187 ++++++++++++ src/cuda/GPUField.h | 122 ++++++++ src/cuda/GPUTypesExplicitInstantiation.h | 8 + src/cuda/HostFieldAllocator.h | 81 +++++ src/cuda/Kernel.h | 294 +++++++++++++++++++ src/cuda/doc/cuda.dox | 80 +++++ src/cuda/doc/drawing.svg | 285 ++++++++++++++++++ src/cuda/doc/fieldAccess.png | Bin 0 -> 46057 bytes src/cuda/ideasForCommunication.txt | 40 +++ src/field/Field.h | 22 +- src/field/Field.impl.h | 26 -- src/field/Layout.h | 43 +++ src/field/communication/MPIDatatypes.h | 66 ++--- src/field/communication/MPIDatatypes.impl.h | 69 ++--- src/field/iterators/FieldIterator.h | 11 +- tests/CMakeLists.txt | 1 + tests/cuda/CMakeLists.txt | 15 + tests/cuda/CudaMPI.cpp | 144 +++++++++ tests/cuda/FieldTransferTest.cpp | 65 ++++ tests/cuda/Kernels.cu | 33 +++ tests/cuda/SimpleKernelTest.cpp | 115 ++++++++ 41 files changed, 3126 insertions(+), 117 deletions(-) create mode 100644 apps/tutorials/cuda/01_GameOfLife_cuda.cpp create mode 100644 apps/tutorials/cuda/01_GameOfLife_cuda.dox create mode 100644 apps/tutorials/cuda/01_GameOfLife_kernels.cu create mode 100644 apps/tutorials/cuda/01_GameOfLife_kernels.h create mode 100644 apps/tutorials/cuda/CMakeLists.txt create mode 100644 apps/tutorials/cuda/GosperGliderGun.png create mode 100644 src/cuda/AddGPUFieldToStorage.h create mode 100644 src/cuda/AddGPUFieldToStorage.impl.h create mode 100644 src/cuda/CMakeLists.txt create mode 100644 src/cuda/ErrorChecking.h create mode 100644 src/cuda/FieldAccessor.h create mode 100644 src/cuda/FieldAccessorXYZ.h create mode 100644 src/cuda/FieldCopy.h create mode 100644 src/cuda/FieldIndexing.cpp create mode 100644 src/cuda/FieldIndexing.h create mode 100644 src/cuda/FieldIndexingXYZ.cpp create mode 100644 src/cuda/FieldIndexingXYZ.h create mode 100644 src/cuda/GPUField.cpp create mode 100755 src/cuda/GPUField.h create mode 100644 src/cuda/GPUTypesExplicitInstantiation.h create mode 100644 src/cuda/HostFieldAllocator.h create mode 100644 src/cuda/Kernel.h create mode 100644 src/cuda/doc/cuda.dox create mode 100644 src/cuda/doc/drawing.svg create mode 100644 src/cuda/doc/fieldAccess.png create mode 100644 src/cuda/ideasForCommunication.txt create mode 100644 src/field/Layout.h create mode 100644 tests/cuda/CMakeLists.txt create mode 100644 tests/cuda/CudaMPI.cpp create mode 100644 tests/cuda/FieldTransferTest.cpp create mode 100644 tests/cuda/Kernels.cu create mode 100644 tests/cuda/SimpleKernelTest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b5a05f3ad..5a1148ba7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,8 @@ option ( WALBERLA_BUILD_WITH_PYTHON_LBM "Include LBM module into python modu option ( WALBERLA_BUILD_WITH_LIKWID_MARKERS "Compile in markers for likwid-perfctr" ) +option ( WALBERLA_BUILD_WITH_CUDA "Enable CUDA support" ) + option ( WALBERLA_BUILD_WITH_FASTMATH "Fast math" ) @@ -1013,6 +1015,45 @@ endif() +############################################################################################################################ +## +## CUDA +## +############################################################################################################################ +if ( WALBERLA_BUILD_WITH_CUDA ) + # set ( BUILD_SHARED_LIBS ON ) + set ( CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON ) + set ( CUDA_PROPAGATE_HOST_FLAGS OFF CACHE BOOL "" ) + + if ( (NOT DEFINED CUDA_HOST_COMPILER) AND (${CMAKE_C_COMPILER} MATCHES "ccache") ) + string ( STRIP "${CMAKE_C_COMPILER_ARG1}" stripped_compiler_string ) + find_program ( CUDA_HOST_COMPILER ${stripped_compiler_string} ) + endif () + + find_package ( CUDA REQUIRED ) + + if ( CUDA_FOUND ) + include_directories ( ${CUDA_INCLUDE_DIRS} ) + list ( APPEND SERVICE_LIBS ${CUDA_LIBRARIES} ) + + if ( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std=" ) + list ( APPEND CUDA_NVCC_FLAGS "-std=c++11" ) + endif () + + # Bug with gcc5 and cuda7.5: + #list( APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED -D_FORCE_INLINES -D__STRICT_ANSI__") + + # NOTICE: exisiting cuda flags are overwritten + #set ( CUDA_NVCC_FLAGS "--compiler-bindir=/usr/bin/g++-4.3" ) + #set ( CUDA_NVCC_FLAGS "-arch sm_20" ) + else() + set ( WALBERLA_BUILD_WITH_CUDA FALSE ) + endif ( ) +endif ( ) +############################################################################################################################ + + + ############################################################################################################################ ## ## Testing Coverage diff --git a/apps/tutorials/CMakeLists.txt b/apps/tutorials/CMakeLists.txt index 91dae24cd..1a6e321a6 100644 --- a/apps/tutorials/CMakeLists.txt +++ b/apps/tutorials/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(basics) +add_subdirectory(cuda) add_subdirectory(lbm) add_subdirectory(pde) add_subdirectory(pe) diff --git a/apps/tutorials/cuda/01_GameOfLife_cuda.cpp b/apps/tutorials/cuda/01_GameOfLife_cuda.cpp new file mode 100644 index 000000000..6f8da2197 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_cuda.cpp @@ -0,0 +1,134 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file 03_GameOfLife.cpp +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "01_GameOfLife_kernels.h" +#include "cuda/HostFieldAllocator.h" +#include "blockforest/Initialization.h" +#include "blockforest/communication/UniformDirectScheme.h" + +#include "core/Environment.h" + +#include "cuda/HostFieldAllocator.h" +#include "cuda/FieldCopy.h" +#include "cuda/GPUField.h" +#include "cuda/Kernel.h" +#include "cuda/AddGPUFieldToStorage.h" + +#include "field/AddToStorage.h" +#include "field/communication/UniformMPIDatatypeInfo.h" + +#include "geometry/initializer/ScalarFieldFromGrayScaleImage.h" +#include "geometry/structured/GrayScaleImage.h" + +#include "gui/Gui.h" + +#include "stencil/D2Q9.h" + +#include "timeloop/SweepTimeloop.h" + + +using namespace walberla; + +typedef GhostLayerField<double,1> ScalarField; +typedef cuda::GPUField<double> GPUField; + + +ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new ScalarField ( + storage->getNumberOfXCells( *block ), // number of cells in x direction per block + storage->getNumberOfYCells( *block ), // number of cells in y direction per block + storage->getNumberOfZCells( *block ), // number of cells in z direction per block + 1, // one ghost layer + real_t(0), // initial value + field::fzyx, // layout + make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory + ); +} + +class GameOfLifeSweepCUDA +{ + public: + GameOfLifeSweepCUDA( BlockDataID gpuFieldSrcID, BlockDataID gpuFieldDstID ) + : gpuFieldSrcID_( gpuFieldSrcID ), gpuFieldDstID_( gpuFieldDstID ) + { + } + void operator() ( IBlock * block ) + { + auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ ); + auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ ); + + auto myKernel = cuda::make_kernel( &gameOfLifeKernel ); + myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) ); + myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) ); + myKernel(); + + srcCudaField->swapDataPointers( dstCudaField ); + } + private: + BlockDataID gpuFieldSrcID_; + BlockDataID gpuFieldDstID_; +}; + + +int main( int argc, char ** argv ) +{ + walberla::Environment env( argc, argv ); + + geometry::GrayScaleImage image ("GosperGliderGun.png"); + + // Create blocks + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1) , uint_t(2), uint_t(1), // number of blocks in x,y,z direction + image.size( uint_t(0) ), image.size( uint_t(1) ) / uint_t(2), uint_t(1), // how many cells per block (x,y,z) + real_t(1), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData<ScalarField>( &createField, "CPU Field" ); + + // Initializing the field from an image + using geometry::initializer::ScalarFieldFromGrayScaleImage; + ScalarFieldFromGrayScaleImage fieldInitializer ( *blocks, cpuFieldID ) ; + fieldInitializer.init( image, uint_t(2), false ); + + BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); + BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" ); + + typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme; + CommScheme communication( blocks ); + communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) ); + + // Create Timeloop + const uint_t numberOfTimesteps = uint_t(10); // number of timesteps for non-gui runs + SweepTimeloop timeloop ( blocks, numberOfTimesteps ); + + // Registering the sweep + timeloop.add() << BeforeFunction( communication, "Communication" ) + << Sweep( GameOfLifeSweepCUDA(gpuFieldSrcID, gpuFieldDstID ), "GameOfLifeSweep" ); + + timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) ); + + GUI gui ( timeloop, blocks, argc, argv ); + gui.run(); + + return 0; +} diff --git a/apps/tutorials/cuda/01_GameOfLife_cuda.dox b/apps/tutorials/cuda/01_GameOfLife_cuda.dox new file mode 100644 index 000000000..4e654d4c0 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_cuda.dox @@ -0,0 +1,139 @@ +namespace walberla{ + +/** +\page tutorial_cuda01 Tutorial - CUDA 1: Game of Life on GPU + + +\image html tutorial_cuda01_nvidia_titan.png + +> _Note:_ This tutorial required a CUDA aware MPI library. +> If you get a SEGFAULT when executing this tutorial, make sure that your MPI library was built with +> CUDA support! For instructions how to build OpenMPI with CUDA see this [page](https://www.open-mpi.org/faq/?category=building#build-cuda). + +\section cuda01_fields Creating Fields + +To run a simulation on a NVIDIA graphics card, we have to allocate data on the GPU and +write a CUDA kernel that operates on this data. In this tutorial we first allocate a field on the GPU +and learn about functionality to transfer data between CPU and GPU fields. + +Since initialization and output routines are usually not time critical, they are implemented +for CPU fields only. In waLBerla we set up the complete simulation using +CPU fields, copy the initialized fields over to the GPU, do the complete computation there, and, in the +end, copy everything back to do the output from the CPU field. +So only the time critical kernels have to be written in CUDA. + +Thus the setup code of the GPU GameOfLife program is very similar to its CPU version, which was implemented +in a previous tutorial ( \ref tutorial_basics_03 ). +One difference is, that fields which are often transfered from/to the GPU should be allocated with +a different field allocator: cuda::HostFieldAllocator . This allocator uses cudaHostAlloc() instead of "new" , +such that the memory is marked "pinned", which means that it is always held in RAM and cannot be swapped out to disk. +Data transfer from pinned memory is faster than from normal memory. The usage of this allocator is not +mandatory, the data transfer functions work (slightly slower) also with normally allocated fields. + +\code +ScalarField * createField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new ScalarField ( + storage->getNumberOfXCells( *block ), // number of cells in x direction per block + storage->getNumberOfYCells( *block ), // number of cells in y direction per block + storage->getNumberOfZCells( *block ), // number of cells in z direction per block + 1, // one ghost layer + real_t(0), // initial value + field::fzyx, // layout + make_shared<cuda::HostFieldAllocator<double> >() // allocator for host pinned memory + ); +} +\endcode + +Now we initialize the CPU field just like in the previous tutorial \ref tutorial_basics03 . +Then two GPU fields are created: "source" and "destination" field. The helper function +cuda::addGPUFieldToStorage() creates a cuda::GPUField field of the same size and layout of the given +CPU field: +\code +BlockDataID gpuFieldSrcID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Src" ); +BlockDataID gpuFieldDstID = cuda::addGPUFieldToStorage<ScalarField>( blocks, cpuFieldID, "GPU Field Dst" ); +\endcode +The contents of the new GPU fields are initialized with the contents of the given CPU field. + + + +\section cuda01_kernels Writing and calling CUDA kernels + +For a basic understanding of the CUDA support in waLBerla please read \ref cudaPage first. + +After reading this page you should know what a FieldAccessor is and how to call CUDA kernels from +cpp files. So we can now start with writing +a CUDA kernel for the Game of Life algorithm. We place this in a separate file with ".cu" extension. +The build system then automatically detects that this file should be compiled with the CUDA C++ compiler. + +The kernel gets two field accessors as arguments, one for the source and one for the destination field. +Both accessors have to be configured using the CUDA variables blockIdx and threadIdx, such that afterwards +the get() and getNeighbor() functions of the accessor class can work correctly. +\code +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ) +{ + src.set( blockIdx, threadIdx ); + dst.set( blockIdx, threadIdx ); + int liveNeighbors = 0; + if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors; + // normal Game of Life algorithm .... + // ... +} +\endcode + +To call this kernel we write a thin wrapper sweep which only has to get the GPU fields out of the blockstorage +and passes them to the CUDA kernel. We use the cuda::Kernel class from waLBerla here, so that we can write this +sweep in a normal cpp file. +Here are the contents of this sweep: +\code +auto srcCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldSrcID_ ); +auto dstCudaField = block->getData< cuda::GPUField<real_t> > ( gpuFieldDstID_ ); + +auto myKernel = cuda::make_kernel( &gameOfLifeKernel ); +myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *srcCudaField ) ); +myKernel.addFieldIndexingParam( cuda::FieldIndexing<double>::xyz( *dstCudaField ) ); +myKernel(); + +srcCudaField->swapDataPointers( dstCudaField ); +\endcode + +All the computations are done on the GPU. The CPU field is not updated automatically! It was just used for +setup reasons. + +To see if our kernel works, we copy the contents back to the CPU field after every timestep: +\code +timeloop.add() << Sweep( cuda::fieldCpyFunctor<ScalarField, GPUField >(cpuFieldID, gpuFieldDstID) ); +\endcode +Of course this makes no sense for real simulations, since the transfer time is much higher than the +time that was saved by doing the computation on the GPU. For production runs, one would usually transfer the +field back every n'th timestep and write e.g. a VTK frame. + + +\section cuda01_comm Communication + +In waLBerla there are two types of communication: _buffered_ and _direct_ communication. +While buffered communication first collects all data in a buffer and sends only one message per communciation step and neighbor +the direct communciation strategy, which is based on MPI datatypes, uses no intermediate buffers and therefore has to send +more messages than buffered communication. For details see \ref walberla_communication . + +In the tutorials up to now, only the buffered approach was used. In this tutorial, we switch to the direct communciation strategy +because then we can use the CUDA support of the MPI library to directly communciate from/to GPU memory. + +The usage of the two different communication schemes is very similar. Instead of creating a blockforest::communication::UniformBufferedScheme +we create a blockforest::communication::UniformDirectScheme. +Then we register a field::communication::UniformMPIDatatypeInfo instead of the field::communication::PackInfo. + +\code +typedef blockforest::communication::UniformDirectScheme<stencil::D2Q9 > CommScheme; +CommScheme communication( blocks ); +communication.addDataToCommunicate( make_shared<field::communication::UniformMPIDatatypeInfo<GPUField> > (gpuFieldSrcID) ); +\endcode + +This scheme also supports heterogenous simulations, i.e. using a CPU field on +some processes and a GPU field on other processes. + +*/ + + +} diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.cu b/apps/tutorials/cuda/01_GameOfLife_kernels.cu new file mode 100644 index 000000000..399f705c8 --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_kernels.cu @@ -0,0 +1,40 @@ +#include "../cuda/01_GameOfLife_kernels.h" + +#include <iostream> + + + +namespace walberla { + + +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ) +{ + src.set( blockIdx, threadIdx ); + dst.set( blockIdx, threadIdx ); + + // Count number of living neighbors + int liveNeighbors = 0; + if ( src.getNeighbor( 1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, 0,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( 0,+1,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( 0,-1,0 ) > 0.5 ) ++liveNeighbors; + + if ( src.getNeighbor( -1, -1, 0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( -1, +1, 0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( +1, -1,0 ) > 0.5 ) ++liveNeighbors; + if ( src.getNeighbor( +1, +1,0 ) > 0.5 ) ++liveNeighbors; + + + // cell dies because of under- or over-population + if ( liveNeighbors < 2 || liveNeighbors > 3 ) + dst.get() = 0.0; + else if ( liveNeighbors == 3 ) // cell comes alive + dst.get() = 1.0; + else + dst.get() = src.get(); +} + + + + +} // namespace walberla diff --git a/apps/tutorials/cuda/01_GameOfLife_kernels.h b/apps/tutorials/cuda/01_GameOfLife_kernels.h new file mode 100644 index 000000000..e0e30c85a --- /dev/null +++ b/apps/tutorials/cuda/01_GameOfLife_kernels.h @@ -0,0 +1,12 @@ +#include <iostream> + +#include "cuda/FieldIndexing.h" + + +namespace walberla { + + +__global__ void gameOfLifeKernel( cuda::FieldAccessor<double> src, cuda::FieldAccessor<double> dst ); + + +} // namespace walberla diff --git a/apps/tutorials/cuda/CMakeLists.txt b/apps/tutorials/cuda/CMakeLists.txt new file mode 100644 index 000000000..efa4d2a55 --- /dev/null +++ b/apps/tutorials/cuda/CMakeLists.txt @@ -0,0 +1,7 @@ +waLBerla_link_files_to_builddir( *.prm ) +waLBerla_link_files_to_builddir( *.png ) + +waLBerla_add_executable ( NAME 01_GameOfLife_cuda + FILES 01_GameOfLife_cuda.cpp 01_GameOfLife_kernels.cu + DEPENDS blockforest core cuda field lbm geometry timeloop gui ) + \ No newline at end of file diff --git a/apps/tutorials/cuda/GosperGliderGun.png b/apps/tutorials/cuda/GosperGliderGun.png new file mode 100644 index 0000000000000000000000000000000000000000..f70d7036c6ae62d1f8cc2d1ea198fe90797b8747 GIT binary patch literal 228 zcmeAS@N?(olHy`uVBq!ia0y~yU@&4}U{K*;VPIfL+_`WM0|NtNage(c!@6@aFBupZ zI14-?iy0WWg+Z8+Vb&Z81_lQ95>H=O_Pgw?ywb)$w`^F$z`#)E>EamTas2J%K)wbA z9_Q$H^(OB_>P;Rx2ee&oDUtW~FuQk)QE`d8WPYHTirVbAc8m$n|1=3TK6rfd>RRpG zBWte+s_xYcNtJVm-F5EAqsY{j@=GO;O?(b^H?P-b{q(wjepAkAjg#G~;`tqlZ4yZx fifuK_HyO8olVY2cz08$?fq}u()z4*}Q$iB}=mAe! literal 0 HcmV?d00001 diff --git a/cmake/waLBerlaFunctions.cmake b/cmake/waLBerlaFunctions.cmake index 2bd75e087..cf594b58f 100644 --- a/cmake/waLBerlaFunctions.cmake +++ b/cmake/waLBerlaFunctions.cmake @@ -84,13 +84,17 @@ function ( waLBerla_add_module ) set( hasSourceFiles FALSE ) foreach ( sourceFile ${sourceFiles} ) - if ( ${sourceFile} MATCHES "\\.(c|cpp)" ) + if ( ${sourceFile} MATCHES "\\.(c|cpp|cu)" ) set( hasSourceFiles TRUE ) endif( ) endforeach( ) - if ( hasSourceFiles ) - add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + if ( hasSourceFiles ) + if ( CUDA_FOUND ) + cuda_add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + else() + add_library( ${moduleLibraryName} STATIC ${sourceFiles} ${otherFiles} ) + endif( CUDA_FOUND ) else( ) add_custom_target( ${moduleLibraryName} SOURCES ${sourceFiles} ${otherFiles} ) # dummy IDE target endif( ) @@ -194,7 +198,13 @@ function ( waLBerla_add_executable ) endif ( ) endif() - add_executable( ${ARG_NAME} ${sourceFiles} ) + if ( CUDA_FOUND ) + cuda_add_executable( ${ARG_NAME} ${sourceFiles} ) + else() + add_executable( ${ARG_NAME} ${sourceFiles} ) + endif() + + #add_executable( ${ARG_NAME} ${sourceFiles} ) target_link_modules ( ${ARG_NAME} ${ARG_DEPENDS} ) target_link_libraries( ${ARG_NAME} ${SERVICE_LIBS} ) diff --git a/src/cuda/AddGPUFieldToStorage.h b/src/cuda/AddGPUFieldToStorage.h new file mode 100644 index 000000000..67968b93e --- /dev/null +++ b/src/cuda/AddGPUFieldToStorage.h @@ -0,0 +1,72 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file AddGPUFieldToStorage.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "GPUField.h" + +#include "domain_decomposition/StructuredBlockStorage.h" + +#include <boost/bind.hpp> + + +namespace walberla { +namespace cuda { + + + + //******************************************************************************************************************* + /*! Adds a cuda::GPUField to a StructuredBlockStorage + * + * - Similar to walberla::field::addToStorage() functions + * - created field is uninitialized + */ + //******************************************************************************************************************* + template< typename GPUField_T> + BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, + const std::string & identifier, + uint_t fSize, + const Layout layout = fzyx, + uint_t nrOfGhostLayers = 1 ); + + + + //******************************************************************************************************************* + /*! Adds a cuda::GPUField to a StructuredBlockStorage using data from a CPU field + * + * - adds a GPU field to a StructuredBlockStorage using a CPU field + * - sizes, number of ghostlayers and layout are the same as the CPU field + * - GPU field is initialized with the data currently stored in the CPU field + * @tparam Field_T type of the CPU field, the created GPUField will be of type cuda::GPUField<Field_T::value_type> + */ + //******************************************************************************************************************* + template< typename Field_T> + BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, + ConstBlockDataID cpuFieldID, + const std::string & identifier ); + + + +} // namespace cuda +} // namespace walberla + + +#include "AddGPUFieldToStorage.impl.h" diff --git a/src/cuda/AddGPUFieldToStorage.impl.h b/src/cuda/AddGPUFieldToStorage.impl.h new file mode 100644 index 000000000..dfde01a84 --- /dev/null +++ b/src/cuda/AddGPUFieldToStorage.impl.h @@ -0,0 +1,91 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file AddGPUFieldToStorage.impl.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +namespace walberla { +namespace cuda { + + + namespace internal + { + template< typename GPUField_T> + GPUField_T * createGPUField( const IBlock * const block, + const StructuredBlockStorage * const bs, + uint_t ghostLayers, + uint_t fSize, + const field::Layout & layout ) + { + return new GPUField_T( bs->getNumberOfXCells( *block ), + bs->getNumberOfYCells( *block ), + bs->getNumberOfZCells( *block ), + fSize, ghostLayers, layout ); + } + + template< typename Field_T> + GPUField< typename Field_T::value_type> * + createGPUFieldFromCPUField( const IBlock * const block, + const StructuredBlockStorage * const, + ConstBlockDataID cpuFieldID + ) + { + typedef GPUField< typename Field_T::value_type> GPUField_T; + + const Field_T * f = block->getData<Field_T>( cpuFieldID ); + auto gpuField = new GPUField_T( f->xSize(), f->ySize(), f->zSize(), f->fSize(), + f->nrOfGhostLayers(), f->layout() ); + + cuda::fieldCpy( *gpuField, *f ); + + return gpuField; + } + + } + + + template< typename GPUField_T> + BlockDataID addGPUFieldToStorage(const shared_ptr< StructuredBlockStorage >& bs, + const std::string & identifier, + uint_t fSize, + const Layout layout, + uint_t nrOfGhostLayers ) + { + auto func = boost::bind ( internal::createGPUField<GPUField_T>, _1, _2, nrOfGhostLayers, fSize, layout ); + return bs->addStructuredBlockData< GPUField_T >( func, identifier ); + } + + + template< typename Field_T> + BlockDataID addGPUFieldToStorage( const shared_ptr< StructuredBlockStorage > & bs, + ConstBlockDataID cpuFieldID, + const std::string & identifier ) + { + auto func = boost::bind ( internal::createGPUFieldFromCPUField<Field_T>, _1, _2, cpuFieldID ); + return bs->addStructuredBlockData< GPUField<typename Field_T::value_type> >( func, identifier ); + } + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/CMakeLists.txt b/src/cuda/CMakeLists.txt new file mode 100644 index 000000000..6959180c2 --- /dev/null +++ b/src/cuda/CMakeLists.txt @@ -0,0 +1,9 @@ +################################################################################################### +# +# Module cuda +# +################################################################################################### + +waLBerla_add_module( DEPENDS core domain_decomposition field stencil BUILD_ONLY_IF_FOUND CUDA ) + +################################################################################################### \ No newline at end of file diff --git a/src/cuda/ErrorChecking.h b/src/cuda/ErrorChecking.h new file mode 100644 index 000000000..49b216744 --- /dev/null +++ b/src/cuda/ErrorChecking.h @@ -0,0 +1,53 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file ErrorChecking.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" + +#include <sstream> +#include <cuda_runtime.h> + + +namespace walberla { +namespace cuda { + + +#define WALBERLA_CUDA_CHECK(ans) { ::walberla::cuda::checkForError((ans), __FILE__, __LINE__); } + + + +inline void checkForError( cudaError_t code, const std::string & callerPath, const int line ) +{ + if(code != cudaSuccess) + { + std::stringstream ss; + ss << "CUDA Error: " << cudaGetErrorString( code ); + Abort::instance()->abort( ss.str(), callerPath, line ); + } +} + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldAccessor.h b/src/cuda/FieldAccessor.h new file mode 100644 index 000000000..bf45831bd --- /dev/null +++ b/src/cuda/FieldAccessor.h @@ -0,0 +1,112 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldAccessor.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include <cuda_runtime.h> +#include "core/DataTypes.h" + + +namespace walberla { +namespace cuda { + + + + template<typename T> + class FieldAccessor + { + public: + enum IndexingScheme { FZYX, FZY, FZ, F, + ZYXF, ZYX, ZY, Z + }; + + FieldAccessor( char * ptr, + uint32_t xOffset, + uint32_t yOffset, + uint32_t zOffset, + uint32_t fOffset, + IndexingScheme indexingScheme ) + : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), + fOffset_(fOffset), indexingScheme_(indexingScheme ) + {} + + __device__ void set( uint3 blockIdx, uint3 threadIdx ) + { + switch ( indexingScheme_) + { + case FZYX: ptr_ += blockIdx.z * fOffset_ + blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; + case FZY : ptr_ += blockIdx.y * fOffset_ + blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; + case FZ : ptr_ += blockIdx.x * fOffset_ + threadIdx.x * zOffset_; break; + case F : ptr_ += threadIdx.x * fOffset_; break; + + case ZYXF: ptr_ += blockIdx.z * zOffset_ + blockIdx.y * yOffset_ + blockIdx.x * xOffset_ + threadIdx.x * fOffset_; break; + case ZYX : ptr_ += blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; + case ZY : ptr_ += blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; + case Z : ptr_ += threadIdx.x * zOffset_; break; + } + } + + + __device__ unsigned int getLinearIndex( uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim ) + { + return threadIdx.x + + blockIdx.x * blockDim.x + + blockIdx.y * blockDim.x * gridDim.x + + blockIdx.z * blockDim.x * gridDim.x * gridDim.y ; + } + + + __device__ T & get() { return * (T*)(ptr_); } + __device__ T & get( int f) { return * (T*)(ptr_ + f * fOffset_); } + + + __device__ T & getNeighbor( int cx, int cy, int cz ) const + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) ); + } + + __device__ T & getNeighbor( int cx, int cy, int cz, int cf ) + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) + + cf * (int)(fOffset_) ); + } + + + protected: + char * ptr_; + uint32_t xOffset_; + uint32_t yOffset_; + uint32_t zOffset_; + uint32_t fOffset_; + IndexingScheme indexingScheme_; + }; + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldAccessorXYZ.h b/src/cuda/FieldAccessorXYZ.h new file mode 100644 index 000000000..4e43dd199 --- /dev/null +++ b/src/cuda/FieldAccessorXYZ.h @@ -0,0 +1,79 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldAccessorXYZ.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +#include <cuda_runtime.h> + +#include "core/DataTypes.h" + +namespace walberla { +namespace cuda { + + + template<typename T> + class FieldAccessorXYZ + { + public: + FieldAccessorXYZ( char * ptr, size_t xOffset, size_t yOffset, size_t zOffset, size_t fOffset ) + : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), fOffset_(fOffset) + {} + + __device__ void set( uint3 blockIdx, uint3 threadIdx ) + { + ptr_ += threadIdx.x * xOffset_ + + blockIdx.x * yOffset_ + + blockIdx.y * zOffset_ ; + } + + __device__ T & get() { return * (T*)(ptr_); } + __device__ T & get( int f) { return * (T*)(ptr_ + f * fOffset_); } + + + __device__ T & getNeighbor( int cx, int cy, int cz ) const + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) ); + } + + __device__ T & getNeighbor( int cx, int cy, int cz, int cf ) + { + return * (T*)( ptr_ + cx * (int)(xOffset_) + + cy * (int)(yOffset_) + + cz * (int)(zOffset_) + + cf * (int)(fOffset_) ); + } + + protected: + char * ptr_; + size_t xOffset_; + size_t yOffset_; + size_t zOffset_; + size_t fOffset_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldCopy.h b/src/cuda/FieldCopy.h new file mode 100644 index 000000000..e51f26252 --- /dev/null +++ b/src/cuda/FieldCopy.h @@ -0,0 +1,209 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldCopy.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "ErrorChecking.h" +#include "GPUField.h" + +#include "domain_decomposition/StructuredBlockStorage.h" +#include "field/Field.h" +#include "field/GhostLayerField.h" + +#include "core/Abort.h" +#include "core/logging/Logging.h" + +#include <cuda_runtime.h> + +namespace walberla { +namespace cuda { + + + template<typename DstType, typename SrcType> + void fieldCpy( const shared_ptr< StructuredBlockStorage > & blocks, BlockDataID dstID, ConstBlockDataID srcID ) + { + for ( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + DstType * dst = blockIt->getData<DstType>( dstID ); + const SrcType * src = blockIt->getData<SrcType>( srcID ); + fieldCpy( *dst, *src ); + } + } + + template<typename DstType, typename SrcType> + boost::function<void()> fieldCpyFunctor( const shared_ptr< StructuredBlockStorage > & blocks, + BlockDataID dstID, ConstBlockDataID srcID ) + { + return boost::bind( fieldCpy<DstType,SrcType>, blocks, dstID, srcID ); + } + + + + template<typename DstType, typename SrcType> + void fieldCpySweepFunction( BlockDataID dstID, ConstBlockDataID srcID, IBlock * block ) + { + DstType * dst = block->getData<DstType>( dstID ); + const SrcType * src = block->getData<SrcType>( srcID ); + fieldCpy( *dst, *src ); + } + + template<typename DstType, typename SrcType> + boost::function<void(IBlock*)> fieldCpyFunctor( BlockDataID dstID, ConstBlockDataID srcID ) + { + return boost::bind( fieldCpySweepFunction<DstType,SrcType>, dstID, srcID, _1 ); + } + + + + + + template<typename T, uint_t fs> + void fieldCpy( cuda::GPUField<T> & dst, const field::Field<T,fs> & src ); + + + + template<typename T, uint_t fs> + void fieldCpy( field::Field<T,fs> & dst, const cuda::GPUField<T> & src ); + + + + + //=================================================================================================================== + // + // Implementation + // + //=================================================================================================================== + + + + + template<typename T, uint_t fs> + void fieldCpy( cuda::GPUField<T> & dst, const field::Field<T,fs> & src ) + { + cudaMemcpy3DParms p; + memset( &p, 0, sizeof(p) ); + + + if ( dst.layout() != src.layout() ) { + WALBERLA_ABORT( "Cannot copy fields with different layout" ); + } + + bool canCopy = ( src.layout() == fzyx && + dst.fAllocSize() == src.fAllocSize() && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() ) + || + ( src.layout() == zyxf && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() && + dst.xAllocSize() == src.xAllocSize() ); + + if ( !canCopy ) { + WALBERLA_ABORT("Field have to have the same size "); + } + + if ( dst.layout() == fzyx ) + { + p.srcPtr = make_cudaPitchedPtr( (void*)(src.data()), // pointer + sizeof(T) * src.xAllocSize(), // pitch + src.xAllocSize(), // inner dimension size + src.yAllocSize() ); // next outer dimension size + + p.extent.width = src.xAllocSize() * sizeof(T); + p.extent.height = src.yAllocSize(); + p.extent.depth = src.zAllocSize() * src.fAllocSize(); + } + else + { + p.srcPtr = make_cudaPitchedPtr( (void*)(src.data()), // pointer + sizeof(T) * src.fAllocSize(), // pitch + src.fAllocSize(), // inner dimension size + src.xAllocSize() ); // next outer dimension size + + p.extent.width = src.fAllocSize() * sizeof(T); + p.extent.height = src.xAllocSize(); + p.extent.depth = src.yAllocSize() * src.zAllocSize(); + } + + p.dstPtr = dst.pitchedPtr(); + p.kind = cudaMemcpyHostToDevice; + WALBERLA_CUDA_CHECK( cudaMemcpy3D( &p ) ); + } + + + + template<typename T, uint_t fs> + void fieldCpy( field::Field<T,fs> & dst, const cuda::GPUField<T> & src ) + { + cudaMemcpy3DParms p; + memset( &p, 0, sizeof(p) ); + + if ( dst.layout() != src.layout() ) { + WALBERLA_ABORT( "Cannot copy fields with different layout" ); + } + + bool canCopy = ( src.layout() == fzyx && + dst.fAllocSize() == src.fAllocSize() && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() ) + || + ( src.layout() == zyxf && + dst.zAllocSize() == src.zAllocSize() && + dst.yAllocSize() == src.yAllocSize() && + dst.xAllocSize() == src.xAllocSize() ); + + if ( !canCopy ) { + WALBERLA_ABORT("Field have to have the same size "); + } + + if ( dst.layout() == fzyx ) + { + p.dstPtr = make_cudaPitchedPtr( (void*)(dst.data()), // pointer + sizeof(T) * dst.xAllocSize(), // pitch + dst.xAllocSize(), // inner dimension size + dst.yAllocSize() ); // next outer dimension size + + p.extent.width = dst.xAllocSize() * sizeof(T); + p.extent.height = dst.yAllocSize(); + p.extent.depth = dst.zAllocSize() * dst.fAllocSize(); + } + else + { + p.dstPtr = make_cudaPitchedPtr( (void*)(dst.data()), // pointer + sizeof(T) * dst.fAllocSize(), // pitch + dst.fAllocSize(), // inner dimension size + dst.xAllocSize() ); // next outer dimension size + + p.extent.width = dst.fAllocSize() * sizeof(T); + p.extent.height = dst.xAllocSize(); + p.extent.depth = dst.yAllocSize() * dst.zAllocSize(); + } + + p.srcPtr = src.pitchedPtr(); + p.kind = cudaMemcpyDeviceToHost; + WALBERLA_CUDA_CHECK( cudaMemcpy3D( &p ) ); + + } + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexing.cpp b/src/cuda/FieldIndexing.cpp new file mode 100644 index 000000000..a61e50a0d --- /dev/null +++ b/src/cuda/FieldIndexing.cpp @@ -0,0 +1,225 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldIndexing.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "FieldIndexing.h" +#include "GPUTypesExplicitInstantiation.h" +#include "GPUField.h" + +#include "core/cell/CellInterval.h" +#include "core/debug/Debug.h" +#include "core/logging/Logging.h" +#include "field/Layout.h" + +#include <cuda_runtime.h> + +#include <limits> +#include <cmath> + +namespace walberla { +namespace cuda { + +template< typename T> +FieldIndexing<T>::FieldIndexing ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessor<T> _gpuAccess ) + : field_( field ), + blockDim_( _blockDim ), + gridDim_( _gridDim ), + gpuAccess_( _gpuAccess ) +{ + WALBERLA_DEBUG_SECTION() + { + cudaDeviceProp prop; + int count; + cudaGetDeviceCount(&count); + int threadsPerBlock = std::numeric_limits<int>::max(); + for (int i = 0; i < count; i++) { + cudaGetDeviceProperties(&prop, i); + threadsPerBlock = std::min( prop.maxThreadsPerBlock, threadsPerBlock ); + } + WALBERLA_ASSERT_LESS( int_c( blockDim_.x ), threadsPerBlock, + "InnerCoordThreadIndexing works only for fields where each dimension x,y,z is smaller " << + "than the maximal thread count per CUDA block." ); + } +} + + +template< typename T> +void shiftCoordinatesWhileFastestCoordHasSizeOne( typename FieldAccessor<T>::IndexingScheme & indexing, dim3 & gridDim, dim3 & blockDim ) +{ + bool runLoop = true; + while( blockDim.x == 1 && runLoop ) + { + blockDim.x = gridDim.x; + gridDim.x = gridDim.y; + gridDim.y = gridDim.z; + gridDim.z = 1; + + + switch ( indexing ) { + case FieldAccessor<T>::FZYX: indexing = FieldAccessor<T>::FZY; break; + case FieldAccessor<T>::FZY : indexing = FieldAccessor<T>::FZ ; break; + case FieldAccessor<T>::FZ : indexing = FieldAccessor<T>::F ; break; + + case FieldAccessor<T>::ZYXF: indexing = FieldAccessor<T>::ZYX ; break; + case FieldAccessor<T>::ZYX : indexing = FieldAccessor<T>::ZY ; break; + case FieldAccessor<T>::ZY : indexing = FieldAccessor<T>::Z ; break; + + // iteration goes only over a single element - stop the loop + case FieldAccessor<T>::Z: runLoop = false; break; + case FieldAccessor<T>::F: runLoop = false; break; + } + } +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::interval ( const GPUField<T> & f, const CellInterval & ci, int fBegin, int fEnd ) +{ + unsigned int xOffset, yOffset, zOffset, fOffset; + + if ( f.layout() == field::zyxf ) + { + fOffset = sizeof(T); + xOffset = uint32_c( f.pitchedPtr().pitch ); + yOffset = xOffset * uint32_c( f.xAllocSize() ); + zOffset = yOffset * uint32_c( f.yAllocSize() ); + } + else + { + xOffset = sizeof(T); + yOffset = uint32_c( f.pitchedPtr().pitch ); + zOffset = yOffset * uint32_c( f.yAllocSize() ); + fOffset = zOffset * uint32_c( f.zAllocSize() ); + } + char * data = (char*)f.pitchedPtr().ptr; + + // Jump over ghost cells to first inner cell + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + data += ( ci.xMin() + gl )* int_c(xOffset) + + ( ci.yMin() + gl )* int_c(yOffset) + + ( ci.zMin() + gl )* int_c(zOffset); + + + dim3 gridDim; + dim3 blockDim; + typename FieldAccessor<T>::IndexingScheme firstCoord; + if ( f.layout() == fzyx ) + { + firstCoord = FieldAccessor<T>::FZYX; + blockDim = dim3 ( (unsigned int)ci.xSize(), 1, 1 ); + gridDim = dim3 ( (unsigned int)ci.ySize(), (unsigned int)ci.zSize(), (unsigned int)( fEnd - fBegin) ); + } + else + { + firstCoord = FieldAccessor<T>::ZYXF; + blockDim = dim3 ( (unsigned int)(fEnd - fBegin), 1, 1 ); + gridDim = dim3 ( (unsigned int)ci.xSize(), (unsigned int)ci.ySize(), (unsigned int)ci.zSize() ); + } + + shiftCoordinatesWhileFastestCoordHasSizeOne<T>( firstCoord, gridDim, blockDim ); + + return FieldIndexing<T> ( f, blockDim, gridDim, + FieldAccessor<T>( data, xOffset, yOffset, zOffset, fOffset, firstCoord ) ); +} + + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::xyz ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci, 0,1 ); +} + + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::withGhostLayerXYZ( const GPUField<T> & f, uint_t numGhostLayers ) +{ + cell_idx_t gl = std::min( cell_idx_c(numGhostLayers), cell_idx_c( f.nrOfGhostLayers() ) ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::ghostLayerOnlyXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice ) +{ + CellInterval ci; + f.getGhostRegion( dir, ci, cell_idx_c(thickness), fullSlice ); + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::sliceBeforeGhostLayerXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice ) +{ + CellInterval ci; + f.getSliceBeforeGhostLayer( dir, ci, cell_idx_c(thickness), fullSlice ); + return interval( f, ci, 0, 1 ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::allInner ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::allWithGhostLayer ( const GPUField<T> & f ) +{ + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); + +} + +template< typename T> +FieldIndexing<T> FieldIndexing<T>::all ( const GPUField<T> & f, const cell::CellInterval & ci ) +{ + return interval( f, ci, 0, cell_idx_c( f.fSize() ) ); +} + + + + +GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexing ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexing.h b/src/cuda/FieldIndexing.h new file mode 100644 index 000000000..437b03453 --- /dev/null +++ b/src/cuda/FieldIndexing.h @@ -0,0 +1,92 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file SimpleFieldIndexing.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +//! \brief Indexing Scheme that executes all elements of inner coordinate within on thread block +// +//====================================================================================================================== + +#pragma once + +#include "FieldAccessor.h" + +#include "stencil/Directions.h" +#include <cuda_runtime.h> + +namespace walberla { namespace cell { class CellInterval; } } + +namespace walberla { +namespace cuda { + + // Forward Declarations + template< typename T> class GPUField; + + template<typename T> + class FieldIndexing + { + public: + + //** Kernel call ****************************************************************************************** + /*! \name Kernel call */ + //@{ + uint3 blockDim() const { return blockDim_; } + uint3 gridDim () const { return gridDim_; } + + const FieldAccessor<T> & gpuAccess() const { return gpuAccess_; } + //@} + //**************************************************************************************************************** + + + + + //** Creation ********************************************************************************************* + /*! \name Creation */ + //@{ + static FieldIndexing<T> interval ( const GPUField<T> & f, + const cell::CellInterval & ci, + int fBegin=0, int fEnd=1 ); + + + static FieldIndexing<T> xyz ( const GPUField<T> & f ); + static FieldIndexing<T> withGhostLayerXYZ ( const GPUField<T> & f, uint_t numGhostLayers ); + static FieldIndexing<T> ghostLayerOnlyXYZ ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice = false ); + static FieldIndexing<T> sliceBeforeGhostLayerXYZ( const GPUField<T> & f, uint_t thickness, + stencil::Direction dir, bool fullSlice = false ); + + static FieldIndexing<T> allInner ( const GPUField<T> & f ); + static FieldIndexing<T> allWithGhostLayer ( const GPUField<T> & f ); + static FieldIndexing<T> all ( const GPUField<T> & f, const cell::CellInterval & ci ); + //@} + //**************************************************************************************************************** + + protected: + FieldIndexing ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessor<T> _gpuAccess ); + + const GPUField<T> & field_; + uint3 blockDim_; + uint3 gridDim_; + FieldAccessor<T> gpuAccess_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexingXYZ.cpp b/src/cuda/FieldIndexingXYZ.cpp new file mode 100644 index 000000000..e1bb2cbb2 --- /dev/null +++ b/src/cuda/FieldIndexingXYZ.cpp @@ -0,0 +1,123 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldIndexingXYZ.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "FieldIndexingXYZ.h" +#include "GPUTypesExplicitInstantiation.h" +#include "GPUField.h" + +#include "core/cell/CellInterval.h" +#include "core/debug/Debug.h" + +namespace walberla { +namespace cuda { + + +template< typename T> +FieldIndexingXYZ<T>::FieldIndexingXYZ ( const GPUField<T> & field, + uint3 _blockDim, uint3 _gridDim, + const FieldAccessorXYZ<T> _gpuAccess ) + : field_( field ), + blockDim_( _blockDim ), + gridDim_( _gridDim ), + gpuAccess_( _gpuAccess ) +{ + WALBERLA_DEBUG_SECTION() + { + cudaDeviceProp prop; + int count; + cudaGetDeviceCount(&count); + int threadsPerBlock = std::numeric_limits<int>::max(); + for (int i = 0; i < count; i++) { + cudaGetDeviceProperties(&prop, i); + threadsPerBlock = std::min( prop.maxThreadsPerBlock, threadsPerBlock ); + } + WALBERLA_ASSERT_LESS( int_c( blockDim_.x ), threadsPerBlock, + "InnerCoordThreadIndexing works only for fields where each dimension x,y,z is smaller " << + "than the maximal thread count per CUDA block." ); + } +} + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::interval ( const GPUField<T> & f, const CellInterval & ci ) +{ + size_t xOffset, yOffset, zOffset, fOffset; + + if ( f.layout() == field::zyxf ) + { + fOffset = sizeof(T); + xOffset = f.pitchedPtr().pitch; + yOffset = xOffset * f.xSize(); + zOffset = yOffset * f.ySize(); + } + else + { + xOffset = sizeof(T); + yOffset = f.pitchedPtr().pitch; + zOffset = yOffset * f.ySize(); + fOffset = zOffset * f.zSize(); + } + char * data = (char*)f.pitchedPtr().ptr; + + // Jump over ghost cells to first inner cell + cell_idx_t gl = cell_idx_c( f.nrOfGhostLayers() ); + data += ( ci.xMin() + gl )* int_c(xOffset) + + ( ci.yMin() + gl )* int_c(yOffset) + + ( ci.zMin() + gl )* int_c(zOffset); + + dim3 gridDim ( (unsigned int)ci.xSize(), 1, 1 ); + dim3 blockDim( (unsigned int)ci.ySize(), (unsigned int)ci.zSize(), 1 ); + return FieldIndexingXYZ<T> ( f, gridDim, blockDim, + FieldAccessorXYZ<T>( data, xOffset, yOffset, zOffset, fOffset ) ); +} + + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::xyz ( const GPUField<T> & f ) +{ + CellInterval ci ( 0,0,0, + cell_idx_c( f.xSize() ) - 1, + cell_idx_c( f.ySize() ) - 1, + cell_idx_c( f.zSize() ) - 1 ); + + return interval( f, ci ); +} + + +template< typename T> +FieldIndexingXYZ<T> FieldIndexingXYZ<T>::withGhostLayerXYZ( const GPUField<T> & f, uint_t numGhostLayers ) +{ + cell_idx_t gl = std::min( cell_idx_c(numGhostLayers), cell_idx_c( f.nrOfGhostLayers() ) ); + CellInterval ci ( -gl,-gl,-gl, + cell_idx_c( f.xSize() ) + gl - 1, + cell_idx_c( f.ySize() ) + gl - 1, + cell_idx_c( f.zSize() ) + gl - 1 ); + + return interval( f, ci ); +} + + +GPU_CLASS_TEMPLATE_INSTANTIATION( FieldIndexingXYZ ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/FieldIndexingXYZ.h b/src/cuda/FieldIndexingXYZ.h new file mode 100644 index 000000000..19d3f98c4 --- /dev/null +++ b/src/cuda/FieldIndexingXYZ.h @@ -0,0 +1,79 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldIndexingXYZ.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "FieldAccessorXYZ.h" +#include <cuda_runtime.h> + + +namespace walberla { namespace cell { class CellInterval; } } + + +namespace walberla { +namespace cuda { + +// Forward Declarations +template< typename T> class GPUField; + + + template<typename T> + class FieldIndexingXYZ + { + public: + + //** Kernel call ****************************************************************************************** + /*! \name Kernel call */ + //@{ + uint3 blockDim() const { return blockDim_; } + uint3 gridDim () const { return gridDim_; } + + const FieldAccessorXYZ<T> & gpuAccess() const { return gpuAccess_; } + //@} + //**************************************************************************************************************** + + + //** Creation ********************************************************************************************* + /*! \name Creation */ + //@{ + + static FieldIndexingXYZ<T> interval ( const GPUField<T> & f, const cell::CellInterval & ci ); + + + static FieldIndexingXYZ<T> xyz ( const GPUField<T> & f ); + static FieldIndexingXYZ<T> withGhostLayerXYZ ( const GPUField<T> & f, uint_t numGhostLayers ); + //@} + //**************************************************************************************************************** + + protected: + FieldIndexingXYZ<T> ( const GPUField<T> & field, uint3 _blockDim, uint3 _gridDim, const FieldAccessorXYZ<T> _gpuAccess ); + + const GPUField<T> & field_; + uint3 blockDim_; + uint3 gridDim_; + FieldAccessorXYZ<T> gpuAccess_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUField.cpp b/src/cuda/GPUField.cpp new file mode 100644 index 000000000..326e5c0b7 --- /dev/null +++ b/src/cuda/GPUField.cpp @@ -0,0 +1,187 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUField.cpp +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "GPUField.h" +#include "ErrorChecking.h" +#include "GPUTypesExplicitInstantiation.h" + +#include "core/logging/Logging.h" + +namespace walberla { +namespace cuda { + + +template<typename T> +GPUField<T>::GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, + uint_t _nrOfGhostLayers, const Layout & _layout ) + : nrOfGhostLayers_( _nrOfGhostLayers ), + xSize_( _xSize), ySize_( _ySize ), zSize_( _zSize ), fSize_( _fSize ), + layout_( _layout ) +{ + cudaExtent extent; + if ( layout_ == zyxf ) + { + extent.width = _fSize * sizeof(T); + extent.height = (_xSize + 2 * _nrOfGhostLayers ); + extent.depth = (_ySize + 2 * _nrOfGhostLayers ) * ( _zSize + 2 * _nrOfGhostLayers ); + } + else + { + extent.width = (_xSize + 2 * _nrOfGhostLayers ) * sizeof(T); + extent.height = (_ySize + 2 * _nrOfGhostLayers ); + extent.depth = (_zSize + 2 * _nrOfGhostLayers ) * _fSize; + } + + WALBERLA_CUDA_CHECK ( cudaMalloc3D ( &pitchedPtr_, extent ) ); +} + + +template<typename T> +GPUField<T>::~GPUField() +{ + cudaFree( pitchedPtr_.ptr ); +} + + +template<typename T> +void GPUField<T>::getGhostRegion(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const +{ + const cell_idx_t sizeArr [] = { cell_idx_c( xSize() ), + cell_idx_c( ySize() ), + cell_idx_c( zSize() )}; + + WALBERLA_ASSERT_GREATER( thickness, 0 ); + WALBERLA_ASSERT_LESS_EQUAL( uint_c(thickness), nrOfGhostLayers() ); + const cell_idx_t ghosts = cell_idx_c ( thickness ); + + cell_idx_t fullSliceInc = fullSlice ? cell_idx_c( nrOfGhostLayers() ) : 0; + + for(unsigned int dim = 0; dim< 3; ++dim) + switch ( stencil::c[dim][d] ) + { + case -1: ci.min()[dim] = -ghosts; ci.max()[dim] = 0 - 1; break; + case 0: ci.min()[dim] = -fullSliceInc; ci.max()[dim] = sizeArr[dim]+fullSliceInc - 1; break; + case 1: ci.min()[dim] = sizeArr[dim]; ci.max()[dim] = sizeArr[dim]+ghosts - 1; break; + } +} + + +template<typename T> +void GPUField<T>::getSliceBeforeGhostLayer(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const +{ + WALBERLA_ASSERT_GREATER( thickness, 0 ); + + const cell_idx_t sizeArr [] = { cell_idx_c( xSize() ), + cell_idx_c( ySize() ), + cell_idx_c( zSize() )}; + + cell_idx_t fullSliceInc = fullSlice ? cell_idx_c( nrOfGhostLayers() ) : 0; + for(unsigned int dim = 0; dim< 3; ++dim) + switch ( stencil::c[dim][d] ) + { + case -1: ci.min()[dim] = 0; ci.max()[dim] = thickness - 1; break; + case 0: ci.min()[dim] = -fullSliceInc; ci.max()[dim] = sizeArr[dim] +fullSliceInc- 1; break; + case 1: ci.min()[dim] = sizeArr[dim]-thickness; ci.max()[dim] = sizeArr[dim] - 1; break; + } +} + +//******************************************************************************************************************* +/*! Implementation of equality operator, GPUField are equal if identical +* +* operator== is required in order to store GPUFields as block data. +* Implementing a correct equality check would require to call a kernel, +* which should be done manually. +* Therefore only identical GPUFields are considered equal. +*/ +//******************************************************************************************************************* +template<typename T> +bool GPUField<T>::operator==( const GPUField & o ) const +{ + return pitchedPtr_.ptr == o.pitchedPtr_.ptr && + nrOfGhostLayers_ == o.nrOfGhostLayers_ && + xSize_ == o.xSize_ && + ySize_ == o.ySize_ && + zSize_ == o.zSize_ && + fSize_ == o.fSize_ && + layout_ == o.layout_ ; +} + + + +template<typename T> +uint_t GPUField<T>::xAllocSize() const +{ + if ( layout_ == field::fzyx ) + { + // allocation size is stored in pitched pointer + // pitched pointer stores the amount of padded region in bytes + // but we have to return the size in #elements + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + return pitchedPtr_.pitch / sizeof(T); + } + return xSize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::yAllocSize() const +{ + return ySize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::zAllocSize() const +{ + return zSize_ + 2 * nrOfGhostLayers_; +} + +template<typename T> +uint_t GPUField<T>::fAllocSize() const +{ + if ( layout_ == field::zyxf ) + { + WALBERLA_ASSERT_EQUAL( pitchedPtr_.pitch % sizeof(T), 0 ); + return pitchedPtr_.pitch / sizeof(T); + } + return fSize_; +} + +template<typename T> +void GPUField<T>::swapDataPointers( GPUField<T> & other ) +{ + WALBERLA_ASSERT_EQUAL( xAllocSize(), other.xAllocSize() ); + WALBERLA_ASSERT_EQUAL( yAllocSize(), other.yAllocSize() ); + WALBERLA_ASSERT_EQUAL( zAllocSize(), other.zAllocSize() ); + WALBERLA_ASSERT_EQUAL( fAllocSize(), other.fAllocSize() ); + WALBERLA_ASSERT_EQUAL( layout(), other.layout() ); + std::swap( pitchedPtr_, other.pitchedPtr_ ); +} + + + +GPU_CLASS_TEMPLATE_INSTANTIATION( GPUField ) + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUField.h b/src/cuda/GPUField.h new file mode 100755 index 000000000..52cc002b3 --- /dev/null +++ b/src/cuda/GPUField.h @@ -0,0 +1,122 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file GPUField.h +//! \ingroup moduleName +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "field/Layout.h" +#include "stencil/Directions.h" + +#include <cuda_runtime.h> + + + +namespace walberla { +namespace cuda { + + using field::Layout; + using field::fzyx; + using field::zyxf; + + + //******************************************************************************************************************* + /*! GhostLayerField stored on a CUDA GPU + * + * Basically a wrapper around a CUDA device pointer together with size information about the field + * i.e. sizes in x,y,z,f directions and number of ghost layers. + * + * Internally represented by a cudaPitchedPtr which is allocated with cudaMalloc3D to take padding of the + * innermost coordinate into account. + * + * Supports Array-of-Structures (AoS,zyxf) layout and Structure-of-Arrays (SoA, fzyx) layout, in a similiar way + * to field::Field + * + * To work with the GPUField look at the cuda::fieldCpy functions to transfer a field::Field to a cuda::GPUField + * and vice versa. + * When writing CUDA kernels for GPUFields have a look at the FieldIndexing and FieldAccessor concepts. + * These simplify the "iteration" i.e. indexing of cells in GPUFields. + */ + //******************************************************************************************************************* + template<typename T> + class GPUField + { + public: + typedef T value_type; + + GPUField( uint_t _xSize, uint_t _ySize, uint_t _zSize, uint_t _fSize, + uint_t _nrOfGhostLayers, const Layout & _layout = zyxf ); + + ~GPUField(); + + Layout layout() const { return layout_; } + + cudaPitchedPtr pitchedPtr() const { return pitchedPtr_; } + + + inline uint_t xSize() const { return xSize_; } + inline uint_t ySize() const { return ySize_; } + inline uint_t zSize() const { return zSize_; } + inline uint_t fSize() const { return fSize_; } + + cell_idx_t xOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t yOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + cell_idx_t zOff() const { return cell_idx_c( nrOfGhostLayers_ ); } + + + uint_t xAllocSize() const; + uint_t yAllocSize() const; + uint_t zAllocSize() const; + uint_t fAllocSize() const; + + + void swapDataPointers( GPUField<T> & other ); + void swapDataPointers( GPUField<T> * other ) { swapDataPointers( *other); } + + + inline uint_t nrOfGhostLayers() const { return nrOfGhostLayers_; } + + bool operator==( const GPUField & other ) const; + + void getGhostRegion( stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const; + void getSliceBeforeGhostLayer(stencil::Direction d, CellInterval & ci, + cell_idx_t thickness, bool fullSlice ) const; + + + void * data() { return pitchedPtr_.ptr; } + const void * data() const { return pitchedPtr_.ptr; } + + protected: + cudaPitchedPtr pitchedPtr_; + uint_t nrOfGhostLayers_; + uint_t xSize_; + uint_t ySize_; + uint_t zSize_; + uint_t fSize_; + Layout layout_; + }; + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/GPUTypesExplicitInstantiation.h b/src/cuda/GPUTypesExplicitInstantiation.h new file mode 100644 index 000000000..bdc4b5846 --- /dev/null +++ b/src/cuda/GPUTypesExplicitInstantiation.h @@ -0,0 +1,8 @@ +#define GPU_CLASS_TEMPLATE_INSTANTIATION(ClassName)\ + template class ClassName< double >;\ + template class ClassName< float >;\ + template class ClassName< int >;\ + template class ClassName< uint8_t >;\ + template class ClassName< uint16_t >; + + diff --git a/src/cuda/HostFieldAllocator.h b/src/cuda/HostFieldAllocator.h new file mode 100644 index 000000000..0c08bfadb --- /dev/null +++ b/src/cuda/HostFieldAllocator.h @@ -0,0 +1,81 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CudaHostFieldAllocator.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +//! \brief Allocator that allocates a CPU! field using cudaHostAlloc +// +//====================================================================================================================== + +#pragma once + +#include "ErrorChecking.h" +#include "field/allocation/FieldAllocator.h" + +#include <cuda_runtime.h> + + +namespace walberla { +namespace cuda { + + + //******************************************************************************************************************* + /*! + * Allocator that allocates a CPU! field using cudaHostAlloc without padding + * + * Uses cudaHostAlloc for the allocation - which allocates page-locked memory that is faster to transfer to the GPU + * This allocator should be used for CPU fields that are often transfered to GPU and back + * + * \ingroup cuda + * + */ + //******************************************************************************************************************* + template<typename T, unsigned int cudaHostAllocFlags = cudaHostAllocDefault> + class HostFieldAllocator : public field::FieldAllocator<T> + { + public: + virtual ~HostFieldAllocator() {} + + virtual T * allocateMemory ( uint_t size0, uint_t size1, uint_t size2, uint_t size3, + uint_t & allocSize1, uint_t & allocSize2, uint_t & allocSize3 ) + { + allocSize1=size1; + allocSize2=size2; + allocSize3=size3; + void * result; + WALBERLA_CUDA_CHECK( cudaHostAlloc( &result, size0*size1*size2*size3*sizeof(T), cudaHostAllocFlags ) ); + return (T*)(result); + } + + virtual T * allocateMemory ( uint_t size ) + { + T* result; + cudaHostAlloc( &result, size*sizeof(T), cudaHostAllocFlags ); + return result; + } + + virtual void deallocate(T *& values) { + WALBERLA_CUDA_CHECK( cudaFreeHost( values ) ); + } + }; + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/Kernel.h b/src/cuda/Kernel.h new file mode 100644 index 000000000..3b6acf0a1 --- /dev/null +++ b/src/cuda/Kernel.h @@ -0,0 +1,294 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Kernel.h +//! \ingroup cuda +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + +#include "core/Abort.h" +#include "core/debug/Debug.h" + +#include "ErrorChecking.h" + +#include <cuda_runtime.h> +#include <boost/type_traits.hpp> +#include <vector> + + + +namespace walberla { +namespace cuda { + + + //******************************************************************************************************************* + /*! Wrapper class around a CUDA kernel, to call kernels also from code not compiled with nvcc + * + * Example: + * \code + // Declaration of kernel, implementation has to be in a file compiled with nvcc + void kernel_func ( double * inputData, int size ); + + auto kernel = make_kernel( kernel_func ); + kernel.addParam<double*> ( argument1 ); + kernel.addParam<int> ( 20 ); + kernel.configure( dim3( 3,3,3), dim3( 4,4,4) ); + kernel(); + // this code is equivalent to: + kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 ); + * \endcode + * + * Why use this strange wrapper class instead of the nice kernel call syntax "<<<griddim, blockdim >>>" ?? + * - This syntax is nice but has to be compiled with nvcc, which does not (yet) understand C++11 + * - C++11 features are used all over the place in waLBerla code + * - all *.cu files and headers included in *.cu files have to be "C++11 free" + * - thus there should be as few code as possible in *.cu files + * + * Drawbacks of this class compared to kernel call syntax: + * Type checking of parameters can only be done at runtime (is done only in Debug mode!). + * Consider the following example: + * \code + // Declaration of kernel, implementation has to be in a file compiled with nvcc + void kernel_func ( double * inputData, int size ); + + auto kernel = make_kernel( kernel_func ); + kernel.addParam<float*> ( argument1 ); + kernel.addParam<unsigned int> ( 40 ); + kernel.configure( dim3( 3,3,3), dim3( 4,4,4) ); + kernel(); + // this code is equivalent to: + kernel_func<<< dim3( 3,3,3), dim3( 4,4,4) >> ( argument1, 20 ); + * \endcode + * The parameter types of the kernel and the parameters added at the cuda::Kernel class do not match. + * This is only detected when the code is run and was compiled in DEBUG mode! + * + * + * Advantages of this class compared to kernel call syntax: Integrates nicely with waLBerlas field indexing and + * accessor concepts: + * \code + void kernel_func( cuda::SimpleFieldAccessor<double> f ); + + auto myKernel = cuda::make_kernel( &kernel_double ); + myKernel.addFieldIndexingParam( cuda::SimpleFieldIndexing<double>::xyz( gpuField ) ); + myKernel(); + * \endcode + * When using at least one FieldIndexingParameter configure() does not have to be called, since the thread and grid + * setup is done by the indexing scheme. If two FieldIndexingParameters are passed, the two indexing schemes have to + * be consistent. + */ + //******************************************************************************************************************* + template<typename FuncPtr> + class Kernel + { + public: + Kernel( FuncPtr funcPtr ); + + template<typename T> void addParam( const T & param ); + template<typename T> void addFieldIndexingParam( const T & indexing ); + + + void configure( dim3 gridDim, dim3 blockDim ); + void operator() () const; + + + protected: + template<typename T> size_t determineNextOffset(); + + + //** Members ********************************************************************************************** + /*! \name Members */ + //@{ + FuncPtr funcPtr_; + + bool configured_; + dim3 gridDim_; + dim3 blockDim_; + + struct ParamInfo { + std::vector<char> data; + size_t offset; + }; + std::vector< ParamInfo > params_; + //@} + //**************************************************************************************************************** + + + //** Type checking of parameters ********************************************************************************** + /*! \name Type checking of parameters */ + //@{ + typedef typename boost::remove_pointer<FuncPtr>::type FuncType; + + #define CHECK_PARAMETER_FUNC( Number ) \ + template<typename T> \ + bool checkParameter##Number( typename boost::enable_if_c< (boost::function_traits<FuncType>::arity >= Number ), T >::type * = 0 ) { \ + return boost::is_same< T, typename boost::function_traits<FuncType>::arg##Number##_type >::value; \ + } \ + template<typename T> \ + bool checkParameter##Number( typename boost::disable_if_c< (boost::function_traits<FuncType>::arity >= Number ),T >::type * = 0 ) { \ + return false; \ + } + + CHECK_PARAMETER_FUNC(1) + CHECK_PARAMETER_FUNC(2) + CHECK_PARAMETER_FUNC(3) + CHECK_PARAMETER_FUNC(4) + CHECK_PARAMETER_FUNC(5) + CHECK_PARAMETER_FUNC(6) + CHECK_PARAMETER_FUNC(7) + CHECK_PARAMETER_FUNC(8) + + template<typename T> bool checkParameter( uint_t n ); + //@} + //**************************************************************************************************************** + }; + + + template<typename FuncPtr> + Kernel<FuncPtr> make_kernel( FuncPtr funcPtr ) { + return Kernel<FuncPtr> ( funcPtr ); + } + + + + + + + + //=================================================================================================================== + // + // Implementation + // + //=================================================================================================================== + + template<typename FP> + Kernel<FP>::Kernel( FP funcPtr ) + : funcPtr_ ( funcPtr ), + configured_( false ) + {} + + template<typename FP> + template<typename T> + void Kernel<FP>::addParam( const T & param ) + { + ParamInfo paramInfo; + paramInfo.data.resize( sizeof(T) ); + std::memcpy ( &(paramInfo.data[0]), ¶m, sizeof(T) ); + paramInfo.offset = determineNextOffset<T>(); + + WALBERLA_ASSERT( checkParameter<T>( params_.size() +1 ), + "cuda::Kernel type mismatch of parameter " << params_.size() +1 ); + + params_.push_back( paramInfo ); + } + + + template<typename FP> + template<typename Indexing> + void Kernel<FP>::addFieldIndexingParam( const Indexing & indexing ) + { + configure( indexing.gridDim(), indexing.blockDim() ); + addParam( indexing.gpuAccess() ); + } + + template<typename FP> + void Kernel<FP>::configure( dim3 gridDim, dim3 blockDim ) + { + if ( ! configured_ ) + { + gridDim_ = gridDim; + blockDim_ = blockDim; + configured_ = true; + } + else + { + if ( gridDim.x != gridDim_.x || gridDim.y != gridDim_.y || gridDim.z != gridDim_.z || + blockDim.x != blockDim_.x || blockDim.y != blockDim_.y || blockDim.z != blockDim_.z ) + { + WALBERLA_ABORT( "Error when configuring cuda::Kernel: Inconsistent setup. " ); + } + } + } + + template<typename FP> + void Kernel<FP>::operator() () const + { + // check for correct number of parameter calls + + if ( params_.size() != boost::function_traits<FuncType>::arity ) { + WALBERLA_ABORT( "Error when calling cuda::Kernel - Wrong number of arguments. " << + "Expected " << boost::function_traits<FuncType>::arity << ", received " << params_.size() ); + } + + // set the number of blocks and threads, + WALBERLA_CUDA_CHECK( cudaConfigureCall( gridDim_, blockDim_ ) ); //TODO extend class to support streams + + // register all parameters + for( auto paramIt = params_.begin(); paramIt != params_.end(); ++paramIt ) { + const void * ptr = &(paramIt->data[0]); + WALBERLA_CUDA_CHECK( cudaSetupArgument( ptr, paramIt->data.size(), paramIt->offset ) ); + } + + // .. and launch the kernel + static_assert( sizeof(void *) == sizeof(void (*)(void)), + "object pointer and function pointer sizes must be equal" ); + // dirty casting trick to circumvent compiler warning + // essentially the next two lines are: cudaLaunch( funcPtr_ ); + void *q = (void*) &funcPtr_; + WALBERLA_CUDA_CHECK( cudaLaunch( (const char*) ( *static_cast<void **>(q) )) ); + } + + + template<typename FP> + template<typename T> + bool Kernel<FP>::checkParameter( uint_t n ) + { + switch (n) { + case 1: return checkParameter1<T>(); + case 2: return checkParameter2<T>(); + case 3: return checkParameter3<T>(); + case 4: return checkParameter4<T>(); + case 5: return checkParameter5<T>(); + case 6: return checkParameter6<T>(); + case 7: return checkParameter7<T>(); + case 8: return checkParameter8<T>(); + default: + WALBERLA_ABORT("Too many parameters passed to kernel"); + } + return false; + } + + + template<typename FP> + template<typename T> + size_t Kernel<FP>::determineNextOffset() + { + size_t currentOffset = 0; + if ( !params_.empty() ) + currentOffset = params_.back().offset + params_.back().data.size(); + + size_t alignment = __alignof( T ); + return (currentOffset + alignment-1) & ~(alignment-1); + } + + + + +} // namespace cuda +} // namespace walberla + + diff --git a/src/cuda/doc/cuda.dox b/src/cuda/doc/cuda.dox new file mode 100644 index 000000000..96652834d --- /dev/null +++ b/src/cuda/doc/cuda.dox @@ -0,0 +1,80 @@ + +namespace walberla{ +/*! + +\page cudaPage Overview of waLBerla CUDA support + +\brief waLBerla CUDA concepts + + +\section cudaField Fields on GPU + + +\subsection cudaFieldOverview Creating GPU fields and copy them between host and device + + \code + // create a CPU field and a GPU field of same size and with same layout + GhostLayerField<double,4> h_f ( 16,20,30, 1, 42.0, field::fzyx ); + cuda::GPUField<double> d_f ( 16,20,30, 4, 1, field::fzyx ); + + cuda::fieldCpy( d_f, h_f ); // copy from host to device + some_kernel_wrapper( d_f ); // run some kernel + cuda::fieldCpy( h_f, d_f ); // copy field data back to host + \endcode + + Similarities and Differences of CPU and GPU field + - cuda::GPUField corresponds to field::GhostLayerField + - fSize is a template parameter for CPU fields and a normal parameter for GPUFields + - CPU field iterators correspond to FieldAccessors (see next section) + +\subsection cudaFieldAccess Writing CUDA kernels operating on GPUFields + + \image html cuda/doc/fieldAccess.png "Accessing fields in CUDA kernels" + + When writing a kernel that operates on a field, the first task is to distribute the data to CUDA threads and blocks. + We need a function $(blockIdx, threadIdx) \\rightarrow (x,y,z)$ or $(blockIdx, threadIdx) \\rightarrow (x,y,z,f)$. + The optimal mapping depends on many parameters: for example which layout the field has, the extends of each coordinate, + hardware parameters like warp-size, etc. + Thus this indexing function is abstracted. A few indexing strategies are already implemented which can be + substituted by custom strategies. + A indexing strategy consists of two classes: and somewhat complex Indexing class, which manages the + indexing on the host-side and a lightweight Accessor class, which is passed to the CUDA kernel. + + An indexing scheme is very similar to the iterator concept, it defines the bounds of the iteration, which is not necessarily the + complete field but could also be a certain sub-block, for example the ghost layer in a certain direction. + + + Lets start to write a simple kernel that doubles all values stored in a field: + \code + #include "cuda/FieldAccessor.h" + + __global__ void kernel_double( cuda::FieldAccessor<double> f ) + { + f.set( blockIdx, threadIdx ); + f.get() *= 2.0; + } + \endcode + We do not have to care about indexing, the cuda::FieldAccessor takes care of that. So this is a generic kernel that operates + on double fields. Using the cuda::FieldAccessor the current and neighboring values can be accessed and manipulated. + + This kernel can be called like this: + \code + cuda::FieldIndexing<double> indexing = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true ); + kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() ); + \endcode + In the example above we only iterate over a slice of the field. Of course we can also iterate over the complete field, there are + various static member functions in a Indexing class to create certain iteration patterns. + The Indexing class encapsulates the information of how to launch the kernel (blockDim and gridDim) and holds the Accessor class that + is passed to the kernel. + + Two indexing strategies are currently provided: + - cuda::FieldIndexing and cuda::FieldAccessor (general, but slow ) + - cuda::FieldIndexingXYZ and cuda::FieldAccessorXYZ ( optimized for cell based iterating over bigger chunks, for fields where xSize bigger than warpSize ) + + \section cudaKernelWrapper Calling CUDA kernels from CPP files + \copydoc cuda::Kernel + + + +*/ +} diff --git a/src/cuda/doc/drawing.svg b/src/cuda/doc/drawing.svg new file mode 100644 index 000000000..4e356d3f3 --- /dev/null +++ b/src/cuda/doc/drawing.svg @@ -0,0 +1,285 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<!-- Created with Inkscape (http://www.inkscape.org/) --> + +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="744.09448819" + height="1052.3622047" + id="svg2" + version="1.1" + inkscape:version="0.48.4 r9939" + sodipodi:docname="drawing.svg"> + <defs + id="defs4"> + <marker + inkscape:stockid="Arrow1Lend" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lend" + style="overflow:visible;"> + <path + id="path3859" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt;" + transform="scale(0.8) rotate(180) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Arrow1Lstart" + orient="auto" + refY="0.0" + refX="0.0" + id="Arrow1Lstart" + style="overflow:visible"> + <path + id="path3856" + d="M 0.0,0.0 L 5.0,-5.0 L -12.5,0.0 L 5.0,5.0 L 0.0,0.0 z " + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.0pt" + transform="scale(0.8) translate(12.5,0)" /> + </marker> + <marker + inkscape:stockid="Arrow1Lend" + orient="auto" + refY="0" + refX="0" + id="Arrow1Lend-7" + style="overflow:visible"> + <path + inkscape:connector-curvature="0" + id="path3859-2" + d="M 0,0 5,-5 -12.5,0 5,5 0,0 z" + style="fill-rule:evenodd;stroke:#000000;stroke-width:1pt" + transform="matrix(-0.8,0,0,-0.8,-10,0)" /> + </marker> + </defs> + <sodipodi:namedview + id="base" + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1.0" + inkscape:pageopacity="0.0" + inkscape:pageshadow="2" + inkscape:zoom="1.4" + inkscape:cx="310.32854" + inkscape:cy="651.81828" + inkscape:document-units="px" + inkscape:current-layer="layer1" + showgrid="true" + inkscape:window-width="1600" + inkscape:window-height="1180" + inkscape:window-x="2558" + inkscape:window-y="-3" + inkscape:window-maximized="1"> + <inkscape:grid + type="xygrid" + id="grid2987" /> + </sodipodi:namedview> + <metadata + id="metadata7"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + <dc:title></dc:title> + </cc:Work> + </rdf:RDF> + </metadata> + <g + inkscape:label="Layer 1" + inkscape:groupmode="layer" + id="layer1"> + <rect + style="fill:#88bfff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757" + width="240" + height="60" + x="40" + y="53.076469" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="50" + y="92.362183" + id="text3759" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761" + x="50" + y="92.362183" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">GhostLayerField</tspan></text> + <rect + style="fill:#88bfff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9" + width="240" + height="60" + x="40" + y="182.36218" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="50" + y="222.36218" + id="text3759-2" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6" + x="50" + y="222.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">cuda::GPUField</tspan></text> + <rect + style="fill:#c7ffea;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:0.71999997px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-0" + width="166.62854" + height="40.114277" + x="277.37146" + y="126.6479" + ry="7.1999998" /> + <text + xml:space="preserve" + style="font-size:28.79999924px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="300" + y="152.36218" + id="text3759-7" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-1" + x="300" + y="152.36218" + style="font-size:17.27999878px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">FieldCopy.h</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-start:url(#Arrow1Lstart);marker-end:url(#Arrow1Lend)" + d="m 150,182.36218 0,-70" + id="path3850" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="160" + y="152.36218" + id="text4482" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484" + x="160" + y="152.36218" + style="font-size:11px">cpu-gpu transfer</tspan></text> + <rect + style="fill:#ffe693;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9-2" + width="240" + height="60" + x="42.142857" + y="293.07648" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="90" + y="332.36218" + id="text3759-2-2" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6-3" + x="90" + y="332.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">*Indexing</tspan></text> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="293.57144" + y="316.64789" + id="text4482-4" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-2" + x="293.57144" + y="316.64789" + style="font-size:11px">represents iteration over a field </tspan><tspan + sodipodi:role="line" + x="293.57144" + y="330.39789" + style="font-size:11px" + id="tspan4550"> i.e. complete field, only slice, only ghost layer..</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;marker-end:url(#Arrow1Lend);stroke-miterlimit:4;stroke-dasharray:1,1;stroke-dashoffset:0" + d="m 150,292.36218 0,-50" + id="path4552" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="81.428574" + y="275.21933" + id="text4482-6" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-26" + x="81.428574" + y="275.21933" + style="font-size:11px"><<< can be created with >>></tspan></text> + <rect + style="fill:#ffe693;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1" + id="rect3757-9-2-7" + width="240" + height="60" + x="42.14286" + y="403.07648" + ry="10" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="90" + y="442.36218" + id="text3759-2-2-5" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan3761-6-3-3" + x="90" + y="442.36218" + style="font-size:24px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:Monospace;-inkscape-font-specification:Monospace">*Accessor</tspan></text> + <path + style="fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:1, 1;stroke-dashoffset:0;marker-end:url(#Arrow1Lend)" + d="m 150,402.36218 0,-50" + id="path4552-9" + inkscape:connector-curvature="0" /> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="81.428574" + y="385.21933" + id="text4482-6-8" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + id="tspan4484-26-6" + x="81.428574" + y="385.21933" + style="font-size:11px"><<< can be created with >>></tspan></text> + <text + xml:space="preserve" + style="font-size:40px;font-style:normal;font-weight:normal;line-height:125%;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;font-family:Sans" + x="290" + y="432.36218" + id="text4482-4-0" + sodipodi:linespacing="125%"><tspan + sodipodi:role="line" + x="290" + y="432.36218" + style="font-size:11px" + id="tspan4550-2">class to be passed to GPU kernel</tspan><tspan + sodipodi:role="line" + x="290" + y="446.11218" + style="font-size:11px" + id="tspan5222">can access a single value or cell , supports neighbor access</tspan></text> + </g> +</svg> diff --git a/src/cuda/doc/fieldAccess.png b/src/cuda/doc/fieldAccess.png new file mode 100644 index 0000000000000000000000000000000000000000..d6dc372b737cea71bb0e4128f11a8bf72edafab3 GIT binary patch literal 46057 zcmeAS@N?(olHy`uVBq!ia0y~yU}|PyU_8OW#=yW}D*0d!0|Ns~v6E*A2L}g74M$1` z0|NtRfk$L90|S3N2s2*qx^<F)fkCpwHKHUqKdq!Zu_%?Hyu4g5GcUV1Ik6yBFTW^# z_B$IX1_lKNPZ!6KiaBrYmd8v!F3<kqeCys6O{Om9LjOC}??c1ahGl=>vi9%FwRd-i z?q0ig?b^~St4g<&X3K`Y4ijN-dX#Ybp7f+U@B1CRe3&>6D%9LtFRnlN`MaIJ)f9df zoxf}A<i(lXBB0AqvUS6jS9z(~_tzWfFJJ2wnUVa3MQPXih}%{wnmRg6pEeoyF>x>y zgn7-(dbgva<HbF}wn>hPF1FgzyQc=Nm3#Q|w%z`~+xN<Nqe`}Y`1YD5nmygP#nUBJ z(Ns^y@cMaWttUQpAs;kTtAcu#hOB<(t+{{t(!WbRTwF}s;*2y}W+Y{%e)e8HFL{mn z+5ac6?d#h;(@A+z_1mfgb~%4a!s6=k(h?Hm&K(XqRZx7~#l<DmTU~O?w&I9|0TE~B z{CUMSn~!<<EJGb7qv?O<@Vxo)uD-SL_w`jFrw(llI@SEFud}0LRiBxph?CF8;*v<~ z^3n}!C-0s<^RSXqG53wlLecwY&RwH(;`8L7Wizeil$2IUaXA>fU$}JY*>p|rFhfBV zB?gD<f&L3-{!ggZ&fcB+tgjVhAQQuyoHt?H-WlK9m3yD_Szmj{s)?HG|0b#H=%{fj z9-Cz?1~y93rFvyp{|t-o?$O2U&MsHGC(l+tw(rBz-mP;FKZh9A#^hjrK27%1o4I$+ z$Z`vYI-lLm=bl)XSFFhLT|!l9Rig+O_vt*}TVJ=Dmn*o0*zT+{Yn<A<SIB|A__&jc z%d+JgHf?-;eM4wCcQLz@%hl=I<Ci-z#GaL97ZjX1Z;^ri^49I~>cz($U9LtfHnWx0 z(OLTkD(SQ~EHd)u2Q5QE$@JXU3L;|D`kwVQcXaspC%$9o=;-J;Q8R-_NKjDlYLX9I zXGcdzmZ4g+i;IiPR*A`uN=izLqIym$2nq@Y>K;p((9zMcLM++H!^OoVq}xzZMM+6% z)lr!CY0vl?R+ydg*vA{1*m$py{rRp9+aAZ>2c;#$NTV4dvRxeugim<&wy#~x{mLw- zu}ydPfn)pn4*s&=Ya}r7;PMCdIcjSAcO42zykq{U*l5DU5^079LHh+O>R;@hE>X%m zVN*=0Y>aKf`zK*pbN%IWI_7*);i-L7%Tx2CG`J<>ivIdOhnk29_dcHdT<^Bs(0}HV z{?PUP2hV<Hzqswbi%)%ev*#@prA4!rUpo2Vt+e=tud~GkDxWTio7r0{`(@S#reFTm zq3@d`R_ZitGLY9wO-yw**!`OEx1f9-t6$!{7Zw#!JJ0Pb%8@qHox{pd@oRN+#xwgJ zTFz@G?yFm)dnGG$=c~TTf81YYTdluhwdzof2KVQMVVb^zaVu-0N}HW!%in&E7Vn?2 zhkxs;fY0@34sfUl3I=ZUQf*dLFMF{4`!a{6S88tDxL(-q|5{Gt#Mwh{r5jJ2KY8Hc z*X{>b*Cg+pyyW-AqxE&}2j)%9xUnj_w{C8VRd8JN%Gxzyt3NywICn!qN$K;759$F< zE*s1Qw`?pwbohOq%W1jATb>4avB=+V+rO{soJymR<mF}GD{Ouon-%mr;^vL1H_v6i z&UacHHhs08pkSbH;KP)H*XElS28kWGbc*eKO|b;;tnzD`;!X>4?{j9{S}4Ei@?<Sp zjfsY0Vb?Ykm#i$YKK5+8kf7kz6j3j>$A_B=ULQ(iaF}h!|3bA%_2%`$_S(*!fybUa zPf_~wDew-D4U49#)V3E*Wz5SC9J;jX%I7OtqKEY49Pg|>-NDb_*1Xx;X7)Ds$=_-p zzIhnD?t1a-Ly0bI^PRG4SgxFMve;R@Fz>Kf#>H9|nOK`IuhiXQot<1dzEwumurMg- zYx9Qp<z8KVe@pUzBXRwZmMKEp*UxwV@if`PzsIetO6bm>S&Ql}mt|a7#2xe6+^ixl z<%4F*vLgqNofE8!ZJTz>y)ZVq-OBB`X`0$`C8ZxKkMd&Os*+4>q_($hpKo7d&D$cd z=*l<aN%_iJPk0y>gr2{+VZ#=;tQZR>{x!~7ITCAf-afp!Mb?V9^2U?ZImu4v6?GN8 z-dFrp%MI9`*j==J^5mIK9Sejzc9%+OZqGlow^4r2U#o?Y&kt_cu*EF(MAech9J?!6 z0u`4}6q0UhTf1xF%>M~HDoxj3s_X1n@oYtaQN_m#Zf_s!Da)@e5h&cQm;7_izY}M@ zE?eHKc>R6G*N<Tq7S~NH;=;0Q%xX+z&6)XMPK~^6mGEfK(+~1ihcos|p4`6i-ClOV z!lx|<ixgRw?fa<xr~T{|%et2r)ZWB@T^ILv$&1$!zt6eseSE&YT&DN-H!~@B*Oqv@ zwo-W?*R-NF2a8${F)7dc#qxLg-euD#xW-sE#-4uW$iW(SUxM*@6)*3$g+XEoU!{{L zPM+9vZ*rW`44!#+|8ke<%x!Mkle2e|*j={=3(c<|I=FP+#p{BCB}}jH)eA6uc(+Z3 z<xO1YUi<wEw;McpAAj$db-k)Y?sBER+@EZXzw0ks+x_Y6H~2Ae`{}E0Cr-}2@#&`U z2|w>E+SwNtai{Is&zTpuZ))Gw35Wj3{P}m%C&u#E^lvv_%AQV4%xsO6fBiqu$;Bnb zqjl237uk%rj-Ry(&lAmws$mJ+XC~zL?%oT#cab-4SU!5SGPZ-quC-&qV~ZWnn0x9g zSKh0vy!rCSx0mNmc<keCJ;Wr-X4}%HyIaXvmG9v3?TUu4q|EA!ge_lszA&!ryZ19| ztJiFfsHoeAwm)Vt$+|3jPR`Dwm)qUvMP<vpo85or`0Oo~-uiOk<!4_T{(5w9v@OoA zZRYlPwWB)xm93GAv8C9|lN&db7d6LEUs#a;fL)wlJ|$g6`OTyKJLER-SJzd^ZZy|V zymk9^$+@>SHDcfXePIhZ#Wd&scWt(_6_*p_71hg?)RiR}c=F0WJ)eI5(V27e85nqH zmAjpuxA@_UM~y!|Xj|;h*L6DoulIfYi_V>^*B`iaYulOYA{7a;W%~8JVP@YxWWM9? zcUwDQ^30~&3!hJ%CsMpUKX7JLiS$C_tlFyMMH<|nPSyT<__ce(_vhve45iX$?zi^| zKMdM0IBAiSU`2v#lGFLbm&UOM<-Qzq?%4QB?Vci}{fyt;<w7BY!*9mm3p;Z2Q&jFB zdwO{G$J^Dxr%aXnU0gyIrWkB|A+&PuWNDqXIa;Cz)^@vJSoK`u%%6mAvqTA#&%SGF z(}kM)^AF#=t8MsbrT>>_#j^jCJIw^=d^{+&<*l`2wAqh|+c&=YdXZy)4G(+$Uyi$O z_AARf4efM1td%T&Ox$i*@hrM(qw4*AnFU8yS6+XZaj$g8x1ZC_&vS8dF^G9(erSjI z{*^yw7mCk$ZF=O%Py5A}FG&94mP^~$nml{|v2&@#g%2N8@yz<E_R2hSwQgC#y6$bY z>JKhG^yRp*`Ba8cMcw+eK)1tTcON{x?YLa$&87XfTsHjQ5^nhEWov+j&X%XOYUdB~ z>wk6hdZR1zKbe!wCqCd=w$+Yj&L(d!@L#?cwY<19er?dBH}B$m=1reGvFF5tV7Jq< zF|Q8Z-97VO;r~SAQny-tnTYUrDF#2k`M!z&D#x(k@`QH7r(5Gogzho&>mT@J$vi=( zn3+9ne}89jy-xV<r=mqkyUhe^63&&sFiqUHZO#2Fl|NoiZJbpr(b>^)Dq!L5i=z4* zg{z`}3hK8WVtO3^FYI}92nXvgpE}8gXF(A~XME=tbA2u@$oYJop&`un_zRULhJ@tz zErt5Jj0-$P{Bp1VSJ+qkxanKr^N0NsJ!kLko*5?;5^IumoBQrDo79r0+4D7vw{Vuc zxViu9sonjxQVbvmCQk2rH8p*y>VsD=?=m#xEuMcxb8=A9@u0IOR`|b`llXc@X|9U^ zgN@YoYic*zuj%SsuWN8xc;In&+3T}kLn0y;9p91OJFndMgTS=2?^3_KKJe=M={LXg z-u_Fp3pvHK<63@?=jUm!duy&0em}M%U7VTW+`Fj8GgB-kf2+^9>%`!&*KKWykRwAu z(dQ!%BHz#0Vy~pMYUevE$tgi=Wp#~Brt2I15DD977W95~h5jMy%Aci17Ry)7^Y?!H z_{))uxtorx2zdL)sb0A=ruo_2hc9o-F$jnjMXlfSM0Lx9<l8*k3hjlR7{9a4y0}4x zdG*fOrE-gE@1K7gyEMEm@xRrIP_KxaHx^%>@X{((vh(7tlwH4PELUiGD8E?jt{cOJ zm`ncV8v-8t{gs~;ztC05Nh2XMHTlcxHLf1z-fP1izS8tvbSLu#&tk*v8dA1lt3y3H zILu=IYpEu6Ez2sA`7+_4)!SdbTGy9WDk&}6z0XIdg(=LeOykUX*SNA>ag||Ndk#+2 zSQ#drbN<F7*55*%X&?7&O**=|obkZk#`zZ`?(V4(EPrhj<b6f|+7@Sfi(M58+PaL^ z`(`(DvtM7FyT3EzZnn%T)2vLBgDdma&Ar!r;O~A#Q#+=YoPj6KpKQ?C<r?jnC&nOg z=axH1V_x#SBlorM9a?&Lw&B00OE+vQy)bS5@95&r9fudJteXD9JFf1``IAScF0OZU zVhYOoHBCq;=(ha=$;OheIYF<oGjD9VeRuB<rMJKQ5A}RE4}Wo|%cGb3`oF@(VXJ%g z)p>AcN3)AONmaIdw(R6Pqsj9`7RzRqem2`<arD$NzS28;Ckj=cR;d*d3_KgU(6YJA zFHhr4zhl^{f3N)f?|+l9jxOdj|MKQOhwbL=SGMKI&f%5Q-^mnqukioPD?19_CO_ZB z+q3QMi>UbZbtmiO>*|H;{#URWO-d4wtl^q?aQTMxpG-GySTFQ_|54({hu16aZ;4&` zakjT#8dumE>8{(?nYSKq2{>@<p6B{?8!HQ9-R=7RuD<8&m?x$ue{W5!WZmy$tVizc z;c)mbWXPc6)}AH*KE`Z&%*T6{yRtjH|GGWRI(=u-qJPzY-OY6Lu3NtBOzrQP79Lfz z<Xhp#^E(v%^5%h>=qr3gz1Uv=+~VLC#IxzLkiya{o_Fr-skvBwN}IuCI{RraHtj`E zR?MB!<}TCi<(+$c`(uVP(ZWrumzO=f_3|#q?jQO=kCi9PpWJzX`L#gF7k`^yFZq6y z$|Ws2;p^<xdEdTDV*7gbL#K}MIj8OI=4+N~>Tc&35&K{3y}y1R`=WL83~%*K-Yp*W zRi^pD<J$ro(x;k9+H5a3WjMR^m)hIkujYmq)g|m-`MKnm$ub+yt(s<i`@}`W{_p*l zmpo7F`l?vx_lq@unVUW0oAR~3xue5lvD3)~+vnTovB@=gMRwenE&lNB^^;c$r=B|c zdH>ld#*^}-Wo1u#bzZ-+KE391rgXJ>iHwc!ZdIQjPm?)rZ036YZJkqMo!-A+pCb3{ zde_Fec>SI!H==KOe-wVMXTFO;O;_>KgO8JT_18*)YVALt0#)ws;}q|A+fh(p%W(Mi zNrqXj+()bW>epTI{%RU{^7{S62O%c!t=`Q0_Wsew`y5$evp((>-yHP#y{m<bOUeAz z7mn<>R(mX4e*W#n`+qE0HFZvP*sX7po`%;M7u+dVVl+{<2#)*BD%+*@OdV9?I6s`! zC9Waz?CF%<^G|71X70$n&(3r0<XIox>s;5w?z$h^8RzVwS(x`eE5>@S^v%r9fQX1i zpF1Mi&VSUH$aAzke&^{I@9WQftw~5t=09MvK7Qeb3x3l*I(fA>*UjB^@7Bc|Q*N8{ zUei2t-mz^W@5#=N4zE=KMmM%N*S~$ci-~{D?&rz^rP3zX@7;U*XWv$~*DkGduH7>_ zx4g7}_q)ZV?+b4oKXho(lv)2(XLAZ3tm=+k<#h6N{i&v!=F)$6f4J_v6{Mkaq4bI{ zgGsj=Q`veJhApoQZ#`K1gW;^r);8VUOc!$Y)I6VaZk8WMW7ytf3=DnwKiOJ;?wp#p zxuWFjwWG>UU&Jrm`=;jO{QJDuc9vvZj?G=*>*BICymgYJB8%LN#XGe<{M?M{DmL~X z6zuI;a(im`ISy@U>o2t*>KPhdr~f~;^ziJGQ^lJvZFp>bc8_bdy7`xR7SZc_CkR^$ z?D%iqEI%V?RpGjf!>QMQ^`73dzTj@#+3s_bK^5~0Gpl8fuC7`9ZR0{)k@YgSyON{# z>6KUCJ!ZqHDB|JY;q;;NEyJmOXCM5SzOPk)36!}%|NbvoBY!nZbe+uYuDAF1g}jS+ zC%$<8U!G~#wt4jSA9&{4uV`+3HDT}U#{UVA)s1<lO%170pZ{WYx?qLF=C%HrY<7RF z7|!Yn^ymHaTCQWO$i;rGCScxlM;Dinjh4*~&-7$K&FA{{72GGc=RZ0fx%R=^>zsDE zRSs)|l#G5%`Bt>7+1r-G_2uI1^3~^`OfSs(-G9QP>tGxI^2e9#r7tc>4NqBmMDfWJ zzqeYa6#MtB-fqgppyJY&aQ+kPjg2LIe|GV>?sd}*JbqVtre@)enw0KupYDH9x7RbC zwyjqEz}?-Flk)E$c_$x#^kBSwOa5`j4O+KWrT!3>&vFmqjr@7*gwgg*FHK6=mhD-# zliS^P{<Mti`9e*7|2qG!zUTVos_}=f@+_&2a*LY$m46&5yOMiDMl8&%gP&W=xK6y^ zE#qG9j<SvY&Bd`>UYTyYQY^LgZL8hn=MQd7%P;uTugYP$QMN9&ZBgD~u@mP{Is|<w zbzzz75apDzHD+RM+Vk@roVLr0b@pdy&v_tx`)io}_xayeos0E}v7Mdp<TCfh?3*?* zR(m+N>&ZEumRlTfez|JMDJGTUcN{`aNUFHSUcR~M_>9KO;t$UB&7UxRZeQ>GGe_n< z<=%2zuT<)n!7rbx$mMn2dCCuNXDZ~yTJ*%8e)cfzfuc#)ZRwdOw+ohh(SMS<{P5Pg z<mTI2f`WomLms9GFtDAi5Z3!Do&25u^Te4#?JHKNmxQh7QoK<5WwONAH%XK8|70h} zvloZOfD*>TS1V&VtbVCC_5E9^yS=#N;F6#Ai~lxgvYlW5_#JO{H2ag3WiNVlm}a$P zyhwik`p&vD65QU|prZH9&rHU3^X_#1nd8$k=gvyq_r^b;22FOHx@p51yDM6Et%RTJ zy4U52tz&=VeB$I>tG`$CFNtloyVvE*Ew8)zZqnW3c58jv{<fdhx&A%-&71WPZ)Ym} z`}N9l$&t2)XU@(4`pRO*73bOa9`nx0jl0hL?ct}1x@@Nxt)J)m<?3du<Deek?xnH! zHP;_l9%`Nw_lITDBIm8EH+XN_xW2jUuliKU{#x1cJwF~x=KLuUTjb=YtgOi<>H0m{ zY@5)}kFL8nW<J!3vD@VOJpI6_V|@Mkw|bpl+oggAh*E_Zo;2|$rsi?PNKM_uaGm*^ zvepxmO_zioK8cc_x9{!4yLawCxa-PrR;gppn-Ytim9N&E(@|UM5^?d*%BtR|gD1~k z&iH@vI+H;4W2Q%knjSoe5x;l#@59Tx+7DVUd2uYZ<HUo>9XreTBsDjlIDfLGyPf~e z-q{O7*Kd3(b#dq9O$z)gKMJ1k>b;)#`{DxE^t~5PuDMe2cFwU+2H))NTGR_%3_q`A zYRB}=FE8WDGWDnuc}8yU*GBgrPtudU)$vu{uUUYp_<GBWJB-V8ep=4#;IwW3@j=_8 zXAhUQbhSnGj{~bC*-bL<Z@&_F{>>Jbw;DN<ztx`J>g&5PTD+ej=+MQh%yPZAr~I~Y zFy4Rpg5Atx73SZ~cd0MiWFT*Nok>7muBW9Vfn#CA=5~G?sqJpY{~b4M*s^c+jUwaV zi4v8fr<~5#1Uz3f-_v=?lnySt))lMMCkRQi&A)c{;+o3}7CWmIzg<&(lD6c)!KL%g z?Omz;=)kM*g-^bpHxqgC)Fx(STSv!=TZW!S9^cJ1Rj1ip6)fDYcX<8j0GqEz>x$c5 zuJTNFR9ZCa^}ij@r9xLu5WWp+`!OuI&hq!D8E9ayI5g{=!rlEsi`OYy>k3|H{`UBc z-q{#?bN%?ax}~5Iz-P%u5#Dd3%=p9o-(IpSFZz7sad-LaJ6V08(L+16=B**N^QWEg z^KWV{jy1`=FD}bw%VNHM*)zWGj*gCZhHA}QugnnNXHw1qvW>TZkyDVE;YBWP?bmD0 zOtFBBa#jWP1Vv6bA1~tKVmjf_rBlz6zHDWHNV#12Xrp=ckE^3g2(#v=;?4hhx$Hs{ z-Z^xvs0z~Lo**Kd#olmIK`@YaNy8LI!Eis3S=ItdtL7!;$2AFv{bX1-8#JtIWXswx zIg>NcZ2NllS=M4otJ==J{~el{m|E)OrFmAC8#M0f<R!_iX&?3f;TD;xlbBmvzRW+N ztzKC5yMI;Foj2lVK#D?nSs7MbUFCmy@}m5|o4+Yf-N~%9=#^vSg!ZrUzUKp1Pg(D+ zS=l+GV?|pKL&KDy$Qjd(-+uUYbk&2$x6jB53(kxLHEn+U(M&C?N}4dGsB`CIMWscn zl)e0zg`9l0Y}xj6Ywdr{YmcATq~|_=k#y|Bi77#kl^;ay-?;Peg5M5LrcS=`;o_G& zZ+f0AXu7k;Gw4(XWN_X6IiE_<sr^$}f7XTf<y(I_dgfW>biRD2vZ_ftlaz(6zT94Z z;;fHpxkN~>>ivD4vG!+zb?Z|T9HWxH%x2vgX*+{MNbqXSp&Lw&ibs61qubAOf7)a_ z_s*;5zqY(nl-m9_H~aO2nto@M;2Zl&_-=iX<KM66d`6ZlR9SQ5_LLt_AK%<)t{<Vz z{Yf-z+qdPPi;d);%Qz}6x^+0iXvJEE7U>2(hFwv&txANsxQ|y0NzUwvT3e)b=JdOj z0TD|gZd%PTF1HDZ_0Zt{v^eBcQ!v9h#vM9KpG;$NaoOrK^Q6Mv|1ARfOg~s0#2T#b z=z|hvm*%N;PNoN@EUM3C{4v{D05ohJH-qO|ffGkKgKYP_Rg67m$rhm0c-BC2&CfRG z6WUcdya}5mlNFVe7G<@aOgQ_Ufnm4Md25kgF(E-gK|{$kCtC!V4oEO8I`S}v;qVz@ zK|w*mP;(!)>}`4%WJOsRsx#cUL8Iv)TNW>jGzpr-V6ZFW3TOm9%Tle`^tK3t8C&1& z=IAp5f`WoGc&<e_ag;OUddo~=nBv;M^C*v?pde#{QN-ll1NG6TL>N3;(>{7An#}eu z?v_+iQc5z4X!dPM`{-fUaGg<wX$G$X^DIH9JVR#}7Z)G4Y$e4Ordbmm86WU!KE3Yh z8pu$^?9u+L`n+x$D2rNp7qdHrE|O+YXDIeoX<}f|OkE+cNW4Lq;TYqMl(ru$*feHM zza_BicZ5)=xg^7(B~vclX%q+z_hHLkCdlwled(WL0Sl*mstR=45yzqU!+X-H)AufZ zWK1u={!?q}sqLlL{igLsnQTtb^0*oqmDHtWqayOPR!U{Xlv8g{7IMmaXmX#C6%Q;u zl(Hf1mikhg&?r{Tr@V?soEdBwj@+NU;YFQlOF{AK`24N^8{%r8>&3pfbzu9(oz6<T zv@<(T%2n6jezg7D=B1Zg1%j7OVSLutxnh@qjL0h47J+a^8}+3&IqxD~{8MTvC|>uv z?B&$;_N(lRo;Ckrp7btz;zIL{35OQ1-1u;!|C6Ur@=VKZT&^;TPY?)PdFR#j=-=mF zE%jkp>6US2kz}3S#?x1C-p!pUaOmuvj{>3WK5U&QG_2WwuZ(5;`u1k}83VPZkYtNp z7Rd)c>D#=jy88d@osBSM>f+C@$N&4p+&3?v_j2`@0B-i@R(D$j`kwW*p13oEM`&VT z*4AB)vHAYR?T#U;R^Q$xa^71$x8tOOprl$$#;paB!XSy%KkQ$M%8Cl#WxuoNIMWM~ zy}ZKnwOH6QzNQtcOul>;{pQy<_lzw2#GRn2nB<lRZI?nGd)Dy<tv&vq1@9Ej<5<>? z6;U1iU5-gUb1r_e%G1lZvS{YLk7ur!oRKpTtUca)z*v57+)Imn@gEe+*56!F`|ees z`2JX{qU&#Se%7gO`@cs_J};tAZEf_6sjH`WwXn87y7kjr<IH(?XZ!dc{4ZF!PyhaZ z{Zn83wZzYtL@VAN*T1kXUA(y6*(G(y|N7SgfxRD&f7jkv_AzMj)4%FHhaT;}_ChvX zGjvDYN3K6sTMRvAY~6UAV%Ps-XfWlTu3)OT(&??r@nesF9o0T@`s7vZ-IbOs+-i1R zGiE=uI)3-n^~3VpnG&+|!x!4E?!MBZ60^U8^`_*^XMC+4ry5Tx2!?7uo<FOTuXfjK z+3z`bE+o`etGn;n`zqSxOLo?zf4lkKwXbXb&B*XC>Yuhz@MWJ%c^el5KmTxPf~SDf zgR80Y7k)l)Pr&KHsh6K4em)cr(44CFw!dSAjt^Vsil~=2wzIq0{|eaoI{Nmlgfq#P zR~aZ>{`GQh>xt7Rk8U~~{m_2F_HPxB!_FG!+;WMC_;}q&@NRUBgOkhmrTXzTvidx8 z7X1BH`fyUv>-1&I9)1>&JA8(Z+2F=XO}W=bBCqZK%LSeCI$refh2|cU5~H@AX2&Pn zy1hDMsq^yH&Tfn92k~3pUW`;1)JaQC=52NQe!ZxwNK4GlR{V0*_k#h?(<-*TkI0E# zSHIKdWtmKzgaxDY^gk{2+ut25{(CHOMz{Pt`40?!|7$&#=Vd%d-D6O|WLEoo^FfRM zzY2Lls>A)}>%WgqxH8{<&9TdS-4a(68{6Ekw^<j@v8rUYOG?xq;VCcVgq?mcOYkaq zc<nrU=i{CAHqyTxR)#E8(C6QMJ*OpoF`L_Kx7huy;_UuTMKMfl_7-w~<t17lY;OGV zZYzI*u?EZgbwAe~DrAwd>Dsw?`Qd{%ITy=S3beAe{&@A&{Ncq5(d+X*%yeLIxL>Gz z>S|e04AZ<fc1)dy8zqdsGV;tY;7b1{fAG=sxTC$^2WR~Mo0NO^&m9{UIm^Z?Eh<Oc z-I7l@3t!kZ+y39pJ+j5eZF*Xx!xsH-31nzUdmF8+^+oNPUR;;v=J^eW4msWKn<BKo zcALme$(b1s@2@-bo|_>jb|2ey{W!<>7o8_gpWMyu?*H(p;^d8{MgnDg-==2G?(k_p zsc==xspB2Py@^46#=-xa8UFA*&}R7ftYhJ>-|3&rTH0G)$<F`!>vTa$;WfMYA7AA# zI7B6Fx;Sx1()~T3%qHa>fB4(Ee&LIIJI=$_0tEk$yY)DJ-xs!RHQ%|_Y>ZDousQna z$HTM!3=F6H-#ILvR>#ngcv&ss+k(!j(&WR+j1IdUmg;1S*LMF{A))i*W3f-gw?Kvi zm*w>iXe-Md0>#~t=W@=QzpX7j$H~um^pJg6;)5lI3=Nt8tHq3OeLcR(+|KR(N96?p z8VQM+oreAX$7lKPKRENR`-S2s32%Rj_s=`@c7un_ms6WJY}&T(bh&eQM1e*f?;rMW zetirL{qpsayTktXEx!FN;m6_0+E>j4XY%O_2hVH~D0AY_R!+U<#4-7g45wm_i)8s> zQKyiynM;Kmt{vm*PTHXN@y^00#mUblEb}y53idqypusSE$&}MerY!fmBI?A^ZuF~A zYwez87W=a8uj$3D`aX4P+{URZA1W@i*!Y`)%};9aWDceS%G_~G41YGKy0_ly+b7ue z!1(#a^fv~%N*oMx*}ply&ihyO^|5j?1B2ezJEyPyTW2c$@KvDr<9Txy+NNcuwzfat z&!D;VpvsNao&0P*GHYw&?k{I#^ObDxSbcq~@?-{scPAwUoD!a|;@wrd?w@Q!nxcQI z?T0Tuxk?-vVOh)$ITayGZ>uR?PBijx<5c{`p~$pQ@?e*k)1RP{nkIpy^#YIF84MYM zS_RHewRj({cqCYL-gZH!I~q&nvlWkIF}!3@Y8BAuQWR<t;O#yY&ZTH4<}`;@K);1U z(5XbHux^&Xp~Xi&UvCwNmwa(t^nevZZ>zvV*Z6r(9pPQSZm>KsYk1xwu(wrUA%hdc z5^lvZvAOYg!j=d;id1y5Xoyfe;_t-qdy!GK;*t3bN{lgrPIrWyc5o@~WBy>%@Vxt% zT+|c;J*Ej!U*y=i6mN-aU93@+>(8mUh+&TQ=h<P*dv?7se&fG)-S^P_Q%)aP%E`Zz zk0C&7>W|OuSAQhm)XMq!PCO@W7gPJc8=S39jkmWeX<hmD*5Iti{QTQtRjF;;c4_+c zd@BBYD#u#emN_!>WyYmlt@rLVA6?vNxcw0Cm)m0H|9qd`-loc+p!aCM%9_Uf<Pym{ z;{Wq*FOD^wJAHC@{kAs_yXVDjc^=}{oVH7Q&+M(&_o%--xpHbADCbS@JgK1AV%;Fi zV8{Mp?-V0vj*EL%+~7J;!0?_agCT*VVXad~aL_4khBu5;WORcW3fc<w8D8)#P&Vm& zzMbI}qsBj%Vg>{011t>b3^hy-$`xI#HJ7eh@pjVux@`<!+ys_RS?Sav$uP}ZbL(LX zSLNz{rV5?|JPfNAUBBw?HB+OdfNg`^foz6*8dhf|54bSYGKw%*FeGR+yk(GMU%+-D zz-y(LQw8IRX+irUOb$sYFa5J&#rsMIH>MX{3VT-ce|7!L)%|;B$QA#!5`G)EZoj-! zCMNPI=Y^k#)dfn79M<1iI<LA_eBzQL>P!rCV!l3J`}o<S@--FJa~^%xy0dNdvRvoc zCA+Jxm<n`1dq2bayN}HOD?Xqy^zZ)sqf4I4AAV?GuBdK&T4gb4;wF9d=a_{T{+OE{ zT_meyB&*h(az=x}i=lhzlq`k`L2>$l^Y|ZdYo;<b*gAC>GpI4|2#8wj#1YMKbp9kS z%~YX=!V580d&5GWM>F)_DSYLq`E=r<Pe&R4u`j5N{Cl@`b<nA;s`K1A6<={E9%IPh zRuFbrZ+-4(oR?&*$mRPCZH|+9(j1F5tJVl4?Vn<_ZAx7_;}7Nr<%Ti_KE?|92L6T@ zLVrVYUUY5Ywv7AtsCMq@waXs7TPZo=oX_bC+NXVduUkL6dW4-pK&)o%nQeEwChgL3 z&Rw2)Yu)Ro3&W>Po06j-cJ{>l&AV5h``9Ra{J_n-qSoQL2S8!4?y7Xc^A`;Z`F%Bv z?UYtoR?M8y^O3D!qlKW!gSIJ~o@`yG!0>CnkCdwW+hY!s55zX4JrQKM!#Ksqgts;* z>-*vfLC2Uqbe8V#x*%yJ<n)DEfX6pz4!;8Lf7Oj`k9-=Rl`_l<JhGl)59@<%0!f_$ zhi=Hn9aUcaUc6S9VT1H8<=MXuM~37b-QKkNgqh~Kck4X7y&V10rX4JLzpR&m;UU|k z-O-MURmXqz$nv#Y`B`izl;-;M$@}f?i$^8-O6Ld^ezm%yHPtWjhNy3Sd0ZQ7!|drz zU;A2Fv&EiN{CE+UwSCjZV<+5~DLu(i@O%50{W-tv@eP^6iO(}?ey!THLEvN|r$?W6 zLsagY>&$|8{b%q@GE8KsnzpH8QR&5}my>Ki-cLOvk>R$jt5mm6Vd)=dua}WuLTxUU z2|Jwhz5Yw`#rpJ*%!)1gCetT+f4hDnpq}lCbMegIe&uo7XK|lh@uEBElq$oQ_nK)f zQ#J*7{p_ByiJhT7Lr=ZmD9D!SK%M5(7VCXpY2ts5tYx`hw)R5p4Td*Sf35ZATdWS3 zE7m^r!eeg!?rQ<7Lo7Cx_8<Jjc6Qc+-2QVG+luF(Tl-XRzD4(f;9tQuvU6W2e0rlk zVdkv1R{#J0&-0Xnyx#oH?LJp?NYGeK*;0>jcm3TZw#$OFHvT)E3aS>rhU*-^({<qE z+}~TyPfmLL@%%K|j0d+R-d4Z-f4$;C=2_K;{Ap*~qShX}aeTRS&Dx_)esd2;tS^&z zeU(qMkaOuZ)kVLV^@S%JS}&BW{q4x{G4<kOy`WQjU7va}#B(d!v3+1@2xF+z*1FA| z&?)eUpP`h&CfF_hKI09YmX5Zxdv~}m_%~E>D9&U3z~9iy;OC*)-5&RiX@+u31^)qs z3?c92>RuLx0&%CF<}{N_Hx5lJVEEf25Y52Tptex^dhN*;fyoO!KRGSf`6rw8!L*65 zycu+aoLroYgI@6@ESd7@_@^(S(SLX6>ifO3<Y3S^aaqMb+;9HzN6+J~UYq&*t@^FC zfzNbfJ8m|sKiv86v0=g?jUS~s-EY45#O#-7DgG~WINQFiZ&~gaaUUC-_mzMC-cGBr z$Tl;p+bpB^_0GlgGX^$4CY9BS%=?$X_^fws<MUs=hdX8lFVN9Avd&cc;tE$|m$rLX zvOixmf2Q!}l<Awlx!g^DbDf_z)+)3dSo%0!K&*!M@3H<1=eExNdQ3*<SF$>&DJuTY zmUF$DoyFJhiY#+^4oq0dzo7KGU)5XlFWFt?%WRu3_S><rKew{@@H_s*$rDFjpWc5V zx`T<q_x2W--EpykB}NRrQNI?}zRxTFyjEe6RKrOHNtLB%M4fnA(il(5Ww;%?<~X^6 zaaOC$^aY+$n;Cni2epY$m2FAW-y3@2Om#K;f$pVWN<BC}s+fFciDvr2GmAxMsmoUV z1(7Zv*%}-eg1x5HGMzotz1MZ4UV|E^@s;p-0!daTC+F%nurG}KTK#;QSv*4p|A9h= z=Zt%J5^6d>B{LLx+4Qpn>?vMvY{I#YzoDDq-h`kyO)c(LwT*gx&%+g6>`dODS7Ipq z@!obCLmz{N$(N`ssr>uLKFD8)w&q`NlG*9mvE$eMGfj_nS5C1BD_XNX(Az#|N0EsA z-rW+<`Q?vql>MEMoZs%IJL_R*`SXR>S17Xm{+Ri$`|5MPgv;CQl^@;W6?yX1IdaAC zw=WEn1oqwBEoi@gyF~Q9->kef(vMqP^ABG-B~*Pa@7m2M)(dMi;`WsCf|`?kZ+}RG zs_RroMyc&ted6}j{^`cge%bME`1UBV=KHJP7WF?YCM7+7czc^@+@CkLKJ<IXI)b zPoia7tX0zGN%I8?-?II&+HzF;eoar^V|S@qdv-sm?gf(>CF1)mr~2ugyY>C{r4x27 zU!_eO{x{sK>dwe{`xev(I{8l7*JYXFj#aD61e{`6Kg^l(X#?l2DuDyuL8tyr<@32b zk7?F?*U*cf%^7W!mgdBMsZnlt9CRw*_oDpNeu1P1%>L*9eEvFrUy<(aZ+~6AR&M|6 zRhqZm=I!x2JjX@$znfw7oAb=?yo<3%KZl&3$gx+TMb>coSFxE_QXH3`d-!&xe_m&I zPvwO>8wF+xyR)sdDqt&Enc-R#o$-v>PxG(HqOR4eTGna_AMDx^KkwJ^@^^=)UVd)) zY9f1fsFH`v|3o7X(L#|~mW678TmHwVXXpuvGjU7|ns&$kaqyJleR6_R&#&*^cNA3T zKFLv-b2s+mjp+Zz<sL36YCdc)*Li9_UNG}^+9@;Pz$Cr9vZ>;}H^l>eQ<BqH&+NZ` zXM4=osjRF81w}`k*V}Y|^gnxL=Uh3#t9qch-mHdQ_vS^ERb=mcZU6DzAFZCYj(6&J z(m}&Ws0%TZuVk5ipSNh1cUIo=<vOe70)3CCOz7y@S*iUZYwe8>v$B?-$Tq(c`Eq7D zc)*~LkyB7oWoggK>j%2OyDpQ`{&0VC=1s|&>JV9wRv{;sqpmKqt$7w{6@I*tX=<|l zs>-UX;L(efoSymhpN`B)DL(F)lC?YRf82!Wb2s~Y&CGrm0UfCbjR@HAKkdTRy}PgH zoG~xgaalG=W_pAC{XZ&pnr8nZi;p{n$QnqhEDBnxree3d<lk?>Y;pawQ)6O$zV8I} zy-U_#^wi|^?Dx>zd`6Z(uvFp4>Ai@Avom?&?|y+px2`UJH~nj^l+XO%Z<oK*{d4>9 zv2)k%BnnRLj#}$hQJ_(_?$(5l-+$TXEjgm<Gbw1+n}4rdzFVs`n@$io6zVmz>bT+F zlmL!I#siLCGYk8jm;9b0uqb*j+r0H2o$JiXww?PbRl%9~Zb66F#EG-ozATx-)VkEw zBP{CJvpHwhh)$BPak1L5)#t2<JIBUAFUh1ePJv24gd3*6i;XDRKRIX~s{u1Zi@-EP z^TT%;4dj=+(&?Vw-05{ZWkcF7h6fu2k{YIbdNsGzwe!JyhL|N&EWg`WtrA=^#f#zN zGtl~6t#)pYtYys%cNpggIBn6244A@^$7H~Iz|Z)In~RIfRgTFAr98Z5aw;%1a5qd= z6>{Qcn9bOuS+(yt&qL67y6GWS71JpKNp=nVj5iogFic=mU~@=kC}ViEWJ=t#ZcqwZ z#oWhpEoL`kg!5<Hb<7Jm8`fP=Y0_f2eMU?YG@2>x!<Ma~nQGAR=yr!_{rudne`nb@ zJf6qy3bLMW^1)fT)+UP^$~Q3;ypRJeQg*p202XXuVm|I)><t>GWSYTqt>nLhF;_$o zUxRL1M{7q%$0|j&X44r{jHC~|wC0O$jy@$IC>SWH)@*uTEW?etTh&w3`K%~t_%l@B zhb?>G-5>d)hTEqoDk&++`mkm1SQL2R$fD(~52hJ`#%q}-AH0>AeR2Ptptg>VjuHdO zHL}~p|4N_HO$AL<sGR;3BINYLp;&t}--SEpdfw?Dz5m=v@rcr9H>az<HVq66jX|e$ zKx?*LzR#NwB&8>?Xn*hC%k5gJ3-_<uaNy+G)tPyZ%l58)JLk@+sSZJ3g=%uNXO+n? zSgVH@w>w?+NV>9h&x_UR6KBs%O3r;Qu{KQeyXdp$dJ}@wrq`MY3SMQYI=MjL(78*e zo)vTcJo7*@Kk8Shq}HC<zYm`}cF#v~l4i@C8;#fY9-nE=vuc@9@FM}?Dn9jpF|+Dl zKbjbm>Ub|X$ZMUAy3#7v63|k7e{JdAzH+^{ugv{-j&1iY-r2JaO_Yq<KmV>fdTsrD z{Ru&Nx@XSUMtRLld^e+`W5uhC$&QLG+gqo8-TJV<{>g><TQ!O-*QW_w4FjzN{r7ie zrQ!AS?=?@Ho|*El0<7NHPFwo7=|AVR2;Le0q=X$8MXl@GZJt)U>+ZEnr_!bfJaQ~P z?(E_c>dwgE)Nwrbjm+<!>X*{9H~A?nx^?j0N#0<wHJi3>*%YbR^1-0o#>FMXRK$tH zoLl?#d%3FYwR3xGWkLF{dfeV8d|}u53TbWW-%H`k<Mq|e^-DjmI}Dmz33dJ}KPzAP z>Z(9+R#?@><Z#_Bx2ov9gObsS)7zOtRynaNx+KGv#&0Wrb9;rae>8Y${N$xub?@w* z^_1a7(z}R`juT%*f|kCzw7@kTGC8z(o9$Xfmv9irW!Z8KofD;{A2U+kS#+$BD*pUw z>$Xjsa+BU!fb{KHs317C-=Lhq#U(^^1`lZ1pW-F*pd!7aBMY{|e#X+B%qz5RW_I5A z@lu(g;h}$BPn+)US(b}dO`ljJJ@e$ofQYDtdCc|^H*d_gUQ(U))>=4l#{BMg$@ATI z|C_ve%li3ok{wre6zqMR9RG(c_qU&cr~K}^8T~&tD44vqkzii7H*JzxXGh1okmSh+ z4>iTK%=;_Na3TGZjIn!g>5`DgSGLI)6umZ5abd~s=T>}haf$9PdlUJ8&gl+`3xBV* z+whEe&aH3Fan)wu{0csQYMmV(Tq5yBVAfCmORFbKREnOCm*ilOFnP?o`2G9)+V_s! zysOQ~z-@NtlBeI*EK$3;mloVvU;N}U_r=mHkC$HCDYrqMwen`hz0!!Cx7V)tvP5FZ zG4~g;`X|mBMal;%E*Gr(DIqiG^{h3%N=l2Yuii-6U<O+M{>uEA(wj?xZw<Y*r8RVP z0w+xG{jotIC+>^Hq($ozj|9)ZSD2@^Dd_RnTW7p_-HXofG8}j*d-~#@*9A6Tu0>hw zs>pb~BGl{c&Yw$F>??0d-l_`ny6dFL`z_xmvCePj>wDbm|CPo5-l3o`{l2frB-zBp z#YOaZ$_G2urBbW!ODy+gxy?VLsV?~WLdWiN0TB^OhM%<7&AZe3clG^tucbO$*GA6I zX<l0^%is{_$0l^Bxv=Q<cMt!bV_zF;Z*2O(X|t^$ed=DZ`+dw8(obnGz4kHd+Qu_w z%l7}azM`dSGkcqHXoic6i>ch?gTJr5e0y53>}5QI!+OrmhX0=CD;}wT-Do4Vz0v)# zS;p;rr6i~GQ*VV$E~`z5KOXygkMH}BFKl0`{`nCUe`jAIW4QlY(2|yn%hfeg_lTwQ z%TGSDIs0z*8qP(EN=mC@eP;2bEj^}e_lN6}@ZyJ$E^^5E-g*%er{lR)ry*puqKMdr z4O@&N+#@HjGXz9LIWF8_a4$cKX_3=8hlLv)CV#6<2sSwtDyNmo_~=m6gozT(ch-dL zF#ph;ez~mt>ao3QOOH(C?Pg^7=9ed7{absk%LbM2Y;^_s56Z9CM%=baxU#wX<LTAP z3=D?X&xJ-BMFh6~v<wgvdm+1h)5hkdQ_se3ojQMV<BsU*ptbM2cxNBJy#23jbi-nu z6NleQi%$@fZ){t;>+sptFKu5=@95w#2sEmA`Xc_uzAvdu)ZJdoglp(r*!A6lr}~d` z#jkDGHKYHXa&U>W>^yfkJ@J)v_Lo~Bn%p10aac1Pm~uT#D#ix1#QCedO^ft%z87C^ zranoQ_;cd+(ql5hReb;deCtfR*w|Q^AGc4|;OpA0A3nVe$g)0~$M&yz&l?_whUNLy zJF3mKuU8%gEhM?K#)QEEvc#Wzap_?_xr=&nFHF89Mnpv24~>8N^zHNYwQ~d{gH&rn zjvez)SA22b{!3qfC1jgOOzx6nt;v@Uxo=&(=4ea6g7;Ix4L;3ub^9M-*s3fomVEzI z`GO#^tp2OnSGfG^{*_Pt>gnPV60~%Z<JPFwv)yve5B}X}V7OM+_aWyN-?^_+N}5_C zoq^X2p56%ivE|*u={#X!X+=8I_8T#HUc9c@!US4%@$9O|#f8rc%D(HL@blOD`sc%f z$2*F?=rcITmgny<&tOw8&reyds_-j{At^uh&?fP!q*K}27YjdZF?xJ++Y{BdS@#U} zYdY)r&+T1(eRK9rn<Fvy{0tHwuc%9yJoa5&s=RK_6}!CK`Hy117MpW%7j5ldyXNhx zHS@cQK4+~mo^J5&TB^rNz3)E{s&7kvtlqC+ug*Lz%<RLb*DT`v@+vNIt~C(?eR=;t zYb`&VF>hb3qWI_a>gNSj$BP(d%|F<-c9){9BG*Q9{iLIN7FKB=0B!qnT56Z;m;2js zHb<G%Cj+&!(R|sv|A&P>OI@CP{u8T!Q;41yGvC_ZBAyqoCuXKLOUmb5oT<Mp`K64^ z|IfFB&TrpV>hOKz`32{fOb__q$I5OwQ}f5)$C4bjo5gSMo7rntE7vg9_p4{yqrBKl zc4h0H2dDmRiT#tm<;>#3*IKWaFLJa0zVqDYa(Bx`3i`Wao<B=jmT-E}@rm;%H?ryO zO88|h9k4yIIJfddr|gXn7fW1XEE_N1u6KFwsXx=?+ldFkw_G;7UUA&;=T*^L-ha)R z{~w%Xeti1xtpb9Ap>C~{53bq5@+ST#AHxFI?SeDDeY4tYwT<uG42$pXBF~<`ni}es z<|T9PO%x-;3)7FTds~?w$3-<=XsGq**^{-UH*fKLm+cq*rF1rz7acyb(?03r7PS*6 zeGYzYsLi<iobmGB2Cq|Iz8CD)J$@IeTC3vHl5m~>Gh@a#oiA@qHhBGFY~W_U_W0Ao zvu>w%_th~8$3>=HvirWiM%;3y=7x>yn<M4(TZ{L$Sv_4_-GBH%d;IZzQ{O*$uwH)A z?HS8`ez704&$ee{xU%v#!{lfG*BC$FJoA4-?tReCzO{!=KA2kn|H`>S>2F8cmtHgd zy5iQ?hQGIts?K#kV-&Ic^@`&g(|<N`eSa9W-|)?xn2EDzcAa^0!|3()x7LhyYj-g+ zyfV+MzW#UXJ6peM&AXd>D!X>h)MPN3{?xng2-B>;%>H?Q<KE9XcUazW?yffTO1po@ zh2AE0{+Z*W;uh<CqVw&o%ADHefB1WEec7{HyhMCSh;!tO{4IN))K~?7E#LN{zw=%4 zyi49J-KCO0CBCh#VZZs(6SVT@{j+~;ukY$^_3F%C{+*#A&AEWNd-1NG8lK<bdspwb zR~8hU*coae>C?l<x?pE=tN-eEYTPSqE(s|yW~lv?_TIDSuhrDmYkQVRu*l!<+tT8( zMBPo<UOhN^na%|jjnuRDEGD1-XeGZ$em{M8-p3}vsf|;&R7Pgr`2AVB>iF!kH~A)# zHa}jwv-L;cys`NHm7NkstFzo!M3u;QF6`fzzU9uvs~@_)bb0l5Ur?KW@9f`+vuAQ~ za(b-v+_$8qrDajZ(%oK>9ZznX`%dFortX&g+)Qe=_v@JTif>-;nEpDwzoTP?k=7g@ zHTUdy2Od1De82smNB4D==5OcTxow|QSh6-gOS<*v&KuiO%$UBfZ}wU_KV)i%k>eV^ zH?MXuURbl^+*z;Yxr@8Eow;<(uiDevSd}kOarx<-^{M?AZ?OuQmGu>@yf3kCUhl<K z*CVbJeqYxVzTou1oq1FHZ0>1lzMA!|@Z;arrzc5Cr)~Pz{eK3d*MG~>lFuoib!4II z4xLPhE3;xaaO~l|_s7E{Og8S@BDyX$cGf>o$xwHHy?|5Ww!dnNV)uI%eEZP2wt4fm zbFZ!+S5W@BEwjl?@%pdct^J#W9+ysXWPiR3v`>m*ma9PaJ+12e1MT<zgr;A9xhW)N zYIbRq<m~B_7uLSHduM-4wa~5EAO0O^t^9vCYW<e^mtSqk=<MjoTE|uIw7b&W`Nq=4 z9=+V=i5CAgPnhnTbm^w^<|Z?1+lv`<-<Uj3-Ds}==+C}*#*A%^FS8$nWdwOW5jlHy zb)HuolYh^ywkkW__nH60w*AjNrgQb!_M*Vmp$sO|Kff;%ez7W~i2u0f=Q^ju(_-Ie z{#SjjG3{INtk|T5pe|CVVa@HXB8_`(%E~XxTJ;u--RHi3q3t|_m#<N*jTGaBm{+=S zM~aqPn;+2AR14WWe}3yy(<<MJg9&rbOo@1_H1*Z7^``^gK0Z*J{l4<Qs`%O|-{eH= z^WVojNy__L6P2;u+DmKJ?<mP>VP>~pa#>2AOubgV^1jD!3tP=&39?(gI?r7B;dR{2 z#U*4KmwNNj;yp`39v_MKe*f#wql<#cp10?gw`iPPeY1XbZgYRWam4da6LsCrpZB}m zvm+wvw#43Q_XF>yx}7(F@@Ac=#hyArzV-1fFIK0&e-*=EvAuljw?8L@W_w9Lx_5xT z_x3k722d7Rv!&X)DdqhWwrcgWdyZbxTfAJ|g|Xq>zuK#yt+8dBU+w;DC%tIHh8FLO zOMh?hc0au)KRMpLdZqo7BW2B{PJZv!Y^(1P5DYvU^e|<^%>M~nURkm+e0YCMUqHC( zT*BTdH=?b!cWZ~$?7#COJAI<S!s^wV@2oMA`Jdc*ai8?|*NbBh*8J&Lt(yJ$-afeu z6HgX~{zalEep=3Cv)=>S>G<UP`LBJ`pS^gte&hG&<{Q30Hy0?|`KIN8pZdR;Rkr`1 zwK}I>lmGfAi9>Ngkl2Iim4_$H6j~nsJn!AH!tGHd@|+ADzrORk#M&+nmn%wQKcp|u z%&_H+;i^-QR;+l$yE=c4eaGb6J?|4|%%0k2Y+bLndGDjC#}cRi(=83z6>)#%EbA+G zEXo7&C-!er@UMQKsid@MRoKeNzn~gmc4Pa5*)zLRH&-uy-~8YC@wUAWV%958O3qWT z`Xb}^@2hRwnJV|^m(&*AlrQ+ds(fLPSkloyi4wnFR2O{y<{A{D0jh!d^yEA=Q(2!c zaKCqGeZFnO?d!}Nll~cenfYbYv$B%wOdIN-?%uFvoAECDx%tN#3%-4Dta1{$w)ek6 zo82!tsqL?O@2oLtiMMNW-e1clU9BDw5jFMorumkj^?=Q3dphguezR=cuJ>^F^IZ#l z=d)i}k*gnZ+e*PshvWFRn#F75i-MDbB|iTV=dj!<{L@l0!Z2TASAQiJZ**z1S0rfd zr^W7U)+<?}p!LBQc>b=<)6#x(DKO<uVQ1*tAf^j{3=2Nazh8PPszjR6T0Q)2@qRt$ zi|hWUu>AeazOB|+`num+w|2+i!1K#hxwt=p_9jg{xV+%ufvSj`Hx~ZBvD~{i{ll5< z1y5hhKjG(nB%S~FqQ4F7TZ^OrTJgJmUU#_kRk{3U5x&3QG&co3-k9{SfT#KoGicY} zOU^)xow<^Wocs<SySMVz+7xxrhU1`<3wD<qpNSOg@a>j!elm5ko4fIw?rlz*EBkhv zvz@KsW)Iun-?K#G&(EjEw~V&iq}apO0_fl8tN1cG_eaH~<rnwe{vy}kSzLc)o2tIj zr&FGDy;t|?$<6rpj_uE<R~*gHZ{4s?eDyke!?(GeC%k%(U&}9f&U?#x=fjEo{|_AV zmtVQ<qyhs^{ioy`H>?x?zxqCLo<#5D=>6Ak7u>qI)$Z7y4@*yO&nqnYe&|d2wa4f6 z=Q+B4%9XkMXKLKsDmz`nuV>@8N=MDUG?BNv@aq-5=F~>^Yl<Hl-pLlmMaea=`Nfv> z`PQd1^R18XsJ~X%vwC@%WbXO1x3{0E*faZg@~NiK8{3|^iabesc<A8QTSna6-g{Fn zoH{0VzNVIA^?ixvDK`w>{P|aJ_4IN4FR|%-`Iq|E2?+{@PW5{KJ#c#>x6S@95_&Rs z8fIiOcJS|S`f?=0;`-;t<@wbm(tnp+{C$hB_KR)B&$0tc56_+;AlFmX8wJ`}n_-{8 zl_MLwMOsk$_M7d;cccgI_l#zd{VN=(n4XZB*ttxW{XqQ<AB$a88RyMS($~)XuW%;5 zn%BMVzsyE+Jq24sslIh`8MhYNSM}6`+SiNDKe0S<*2fjJ*0)B!%6HD%*uOV-_^vP7 zTW+;J_0Q%L9+%GS&%U%Te&=~<gIY!_zL|ZWe_wkP_CPU3<@ljv_c-VI-afcFlD$OE z&O80}%p~1iyzRah5A}7aWv{L3JNReo^@5_?Mn_uaxd)YKh=@&F@%*w-@2zi6-wpk< zzw7Ur9H6n~{`^NjtIHQ(zR><YOLW7+ze2h?YYkGa?FOyX2klw_EmH2_xOj%~%Lhoe zv7AXt>~7>Tome-UF0t5lWu+e~Ldxs2+?UNRGx+oOOU4~3eI4soW9xdik`t0kR9y}& zFXi9z%2aG^b#Lp|xypa6_#4wVUwZH1?BWt4$<z?DL6K+OJl`{*GYo=3saAP^{kDY) z`p**~gE6VMI#WO^&r0GGL6tX_Qv%S2KyV#05X`YKVD%JM(D=_P!>OAXvbJj7zf&k6 zIPs<Cnzu4}NAK-{4BB+>s#0?gjVQPXl?-WRJ->+^Ns_S~^y(f06xMgn{XdEcloN zr^p%gR(~#ClmU+u1y?zVeA;w5mw^SeyjKad-nZelxn7j$v*#dzRc(j#<(s#zb#9qr zbo<Umu*D2>gqf!QkNcRmaA&jPqE|~5%(<<npFOL4;%rn3Xsl^Pmd27PubTGEd8xj= zZhhaLqDyx^3Iv*ND!zHAY0sM;2F<5M;7|&6Zv4Nw{Qk$l(3kSZ?eDi-2o%`Jz@??s zrNq_6dNFk4>szmHm0pjl+x2bh`r6X#Tg$KOed}W7?&`f4;=(wUZ_yW_NzcC@w-nA( zGxlsa!1Anee_W+y`MaIz=k7?Jnf%=3z!|sc({%Q|*(u#O^ZDJQ=PVoMv(+CtaL<35 zi2j`Nn02=w%_+`1su^8!>&!pn@@HEb%ja%-`sd@~noA3cf1X-1<$TrM)c)9A`GqGR zxu(y!xqfHf<l=KXMcvc9!=_FQS-xPw2QE>On5&OhbaQ{Rd3o#5A*<5j8#dv=V&`W2 z?rrley2}%O+E?Cw*7JOuvV&6!mFu<REdEOv|JJ#lTXfEHd(u>Hw$t;bSczWE|KYTk z&G%N$rnm1(&F<`p4t#xf*QKJ%xf-jk8?c`(IsN*u`nuHZkM)-O1@4V8KYCSZbN%ci zKjhhr9=13Bu~~HgkD;^uTIDY<9zALm|NSQM`JB`IlFg0`o~{S%CT&}Ny`WdFyIVXj zIB3s?PA)F*@I2vIUkzPN%@9Y&l`8QGccyN<G^uG9uhjpnrAy;=qYSpoe^@#5r=RjU z%jxc$*qSA|y<ao&hQ$=$;kqRBg58EaX5!;*J8q@UN@ti`thfJa<g7z!W~r{GN7h<Q z`o_ldWAe=2(}l%ziqBbwcZ-C?9$CxA$IP;2!WFaazJ=#3pU31rI`|;VFDQ0_yNwRp zx3h^5rKc+lcKmwHxJ2#m6?VR}iP2#FU_omE4KJDZN++*R2Wgjia4^BnqUDott5;)e z(G{`hbBfPhHIn05cT6RhWs6t!F1`W>zFCjw6rbA}b)xy;)!TIknzlUe;<)JqQfbN0 z7GB)WJo6iW&Rg#@7SHDt^Vvwm<a(~+e>0~-YiI9+nwm~Ykiv%tAH2#jPbjR}!yv(+ z({;S}gK8ScN$u>-t2Z+@?rL%PV49#;py|!z6rXg=^7))%@9pLrqnFC~aD0nr?=#*U z#Nc`V`14nS(`NiMNY^a^dHh^uT7jubX7B>l3)~jh&UmTT>Svl8zvlfj^Fr$zh-)hS z(x$yO+aaa)GA4oj!4DU=g4~2@Gj+R;|7I|seba1b_R8XOmd}gM8aJ<Ae!o#Ts4=00 ze?#gkg)|QR$y@H7$^$v^c^3b9o@bn%)eO#ObE?3p{M^o4hmD(mCRZ=WTqpey6gIbx zHXr=6al$8Ve`}C;D-CA2?_+=VWS8JM%ja`e$J$7ok#`rKHX9V;bF?1|dwgnH5_TXz zZJWaNT##7I+lMm`{`tt4DAWmZP>kex9xISJPYuCV&-c8gdHrnx$kT5h%{&;|vt*aU zM%}0E>MNNI?wtucDS31fS0j_Lc3htoi^$S#PfiCd+t<Ik<ecSne;eZ+uOvC&?BTgS zmwC(Do6AD_mn@s~E%scN9)n!`Ijdcai-XuFRUNaP9en(3!%xEpW#=rPe`;cH{`uHA z!L2W@TKK_AnZ0F~{0|)Me9Ye9XPMOvN{HH5PL?u<v3TY->}L=@^QP15lqqW;!>w62 zr%s)s&Ge1A&t&ssEzRqK76K8{5=?s%(vB9cx%cis)Pd~>{s+ZQom9pkB_!5j$MA!x zVtrTM11C>)CfTH${zAC|1r{4*4OlDgoJnKIp78MVv@Mne0vDJhq;Bu=xU}=Y=L7Q} z|J~^oB>NyDLHj}a14HB4;;%EM@Bcpbv2?!CX3mYa#~FB4Vg=&AX&V35z4w1VL++!$ zRc2rJJua<h+{R$H>Pei!ON$5I#?s0w89$gmkTw3TnD+A=%R5%C$F&ZZ{vDWgAaRlv zW4y4~Gq#A}OMwoy4^HO%w)DPbcdvi>nVbaGx2xSxWZALq$xl<AI%V(V50-oOtSWnH z{d`XJ)-`4`9X+$Xtb8vmKcL{$aM9j*r;;bT=POr^jBo6}(pDC;dwypUZuI83xix2f zy^7+37cLxHWr{4V4C&098#f;Kcp$se`#a-1wmfz*hV4R3(@*_jzag!`bZ=5g&Z9f0 z4~QNJadNP52oiRhaLr)t*><M;47ZP%y=Pe??SG$f4_^uUvSWtT^BL-UzbD5(_$;B| z6mg|WH-Y7XoP^d&*KOtsnYK<0JF3^bcRE$he4qV}6UU_WY<?@A)O}H5=@WSOTlRqG z#|ribI>ymHFVyz$np71uuQS!0{g_vjviDBI12GR&J@0$%in-J#c}i6H-RG|dSUk%; zZ<%?|t!-SKv@>1ht?>K3#h_w%#!ao?&L_UeaO~mu!}o=WkL^{O#%|jJenpR8Qv|M< zoDRrSb~^BZS+ToZbwi-(Eh9#Y-BM3y2{Co>KK{71C1HhvTsd>y+9$s*9#HOlT->nn zW8wJ<c`5aC^*nRr@7G`6vg;V5cuM8m)&oyF9}9CH^iO=WGw^_5<hu8BcgZ`5Csf_~ z`JZ9ETHoyB*YB`ou<EdF^O^IXvBK(fwk&^*ko<i&4z2xN$FDPgKYHAH$-7XSf1EdL z3-9n4JXmq0IG(XR?BmQXgTBxEe*FC<VZoX)X<<(FgIzx?`&jgtqf#}lE7fg_Klc9k z+ymEk{xg4W|L-vft-gPL)ji)mh9B4zHEte1vv1v!H?tjLou<9BG(LTex0-!n_LsB= zZpP6Op~^|qR;zvpoA%jVXqVS^^_0#oj+x8jlO}#YyT-&$j&IAfD~HZ3ORYS~e_8Io zdD{A2OrIN<JL_vRycXV7`dy7>-u)wHrs^sF#@j5b^gr&%{`3BH=dRaBcU61d>Jlh< zCpACbc;ED#T|&G1CP!ZSd0^TBn}k_2dp+j7_B~;f_DS$V!5gJ}%odU_WZ$2ZYjMcR z{NiUL$CtysXVTh&KYCx8e|hfWtYE1S(O{hCqy3w?pm)jb2I<Dx4P^&1GoLPF{O+-f z-$Hapw?W9l!$JGRpU){?7qq&>AR}$D>Q?oXwt17kuz%3`uxDnkzxKU}7p7L)6>MJe zoYV2)ge^tK1&>{j<?osPMeT#khl#23ucvZJ%w95o!WOZ_MTh1cFmMvflu}$h)5!j# zg{Vl_wbuI@Z#N1yoe@f~QGDS&i_u3d<#tfe>D3__#ZE<QVmLp<w(VMe?EgRR3UQ13 zXPzDV9Ub{SO;fR>+Uo7kLWgvx1ralLJn!8cnv$6FDB{rbZTn{OPQEhTfMtQUV(@m8 zOZNR<?ZHPDd=79ru%CC~>bi8}-%4v^1UH21Y}a3S`)|OZcLpy@^W;y<dY(A(PhOGv z9jH!R5n6X<!OUK>&8s=@c0XsFEp%*kt=3BSMR!V!cOO5qOK?}PIOF<b8{?L%p3*hg zEPnQl!=Wdw2M^eDChssxn{nn%xzqGdf|7pu?k|*NQxfmoifdVO^V1Fcw;v6RXY<$i zok%)svRQGZyp#E31BFWaG+{NZl&oU4`Nv-$UwYNnl+Dtb`Ta464T6f@Q$KNEP)U0i zWT)ICE;DUKwuOQ5zwLQzggw7<&OCeSiz3Ua`6_+u+1w&#I7~hnk+4_mex=x?TOL)M zKc)(F*fDG|+iX0!alsNN&+o!^?WZ~4@0~kSHZ5&^WYS${&*$F1CK}vKVY_hmOqt>4 z_A_t#Q*&1rpR=5PR%@=LPgiW|d|@@yIqoxN&MtgV|7O+i_9d%1g;lK5G|hQ`=r5Em zGd^_SUydJp-1HI#Md7W?il=X|uTWlTu6QBS`@}un{<cYMOp_b#ay(Wu+^ny1my2im z7rjK8Wb;ETQ{QUL|JY=Z_v!2owgh=6^KY7I-;#fE&${V7=^JC+v@cc$-3F15ZH#}1 zToP`1uuEU@hTN1LWwVxiZ`4<<<^MDHg6*xj6KZRFKg8bQevy*6?Bmu0zY7<g_iIVm zZB)zQ%#hcAT%L(vwa>UxZinlo#E&1_r%pM$#B<NH^;22q2pBDy+>n=MwyevF*J53l zB0s32?foe0krfbgpd$Q`%EW_dW*T<!E;CGHoKD1)vbfE@DPQP0P03hV_OzIQQNdrG z!+Fvhe;0@(zKLL8uzqH*ul8n5MG01kq_o4Td%6BxKEt+TYHYVaz&g>>d>!Y%Gwkz| zH<mokR;GHUMs7k@xy|)f1II=BiFX#9d9(b;&uJ}Z-Zso|^zq$uW6lKD-*?U|Q#qCI ze&Sr6-;1vNq_q1>?@x}{#;Eu~HYIKIqvG91e%n~et!K=c$FoJnRHek*Uxnr6-A4c5 zWUt2tEswrbznN2fZmph;#2Mj9yO{5XO*~ix^54A3r^k7AW!;{v`dSX$)?k$5c{cf_ z&P+#;dQ0{SgBkv60>^%W+G6LXeoQc^mpN3Vv-lCHwR3f{zrso3NoGvTX58Epv;8PY z&DQpGw$I12b#4ZMnmomPAI>ZQH)Y~JpGlZE;frrf_63l#bv@~9pIesj9jGv#J^A%E zP?K@pw-09${_WtnVR5||+QvDU@bAZj4Ij3EO_1bg`|QrWAP>2zb&+eu{MT+^-DM!R z?cwz~TdE9du7&TeFqmP#j^Wl*SmUcEbEb{rN=}3OXT+ZG(gnG0wXKcu4EtZq`(A); zIJL4YEg(p6gXo2&j(-=;?dsyXo!M%3_Nt1p_LLB=3auNV0a<6}*rq$$ComXXIFt5d zm+v{t=V5(X&5oY3(&nqapZLRiG*<eWzShe<sjrV(m)>0wc$;_D`Xy>s&%?IN-2B`) z;e<Z-ez{MdAAJ=5pK)V@<HajGKa0htru9GBl?yUz&6<M?mK<tg`nz|5!}a;apVqO8 z|NH9SD!nah+oajg=S1Ipb|&_O``@|;U%KZ<8<gKWwEo$-=#o7%d(W5d2F0#~WV55E zv`J3D_p@SWrSreNsa3yvf8}d8%ZypYdehU+#vQ2qUy*sScs*~-o{Wo+CZBzCG-mrr z%jYp?R}1iX1s!H9-EWn0f9>|#AAQNMOjg8CS-i~hc~-OeJ^6DFm)q}Jv13O{w6XNe z*WWCj&siNQnkMXRwcqvgv1q$b`ur?&iwiEqMSuKm{9PmG#)5aBC%t;I%l4e*^Q^F) z2?3XumA(?W^XboDb@Mgx=b0U!#}u4({VlpJZvV?;t*xx#Prxw(Qo14V@UHLkYlHfm zWOr>j8(2K2cx&L@MNV-QAD*-cg?nZ}ZMRH4xIjfzeEOS;PZ#gnsJ@u9dO`8=YdL!! z-r{bMExs%Ed`_{c`b^0?H+rg~b9U98lX^U7_1&^td5^0m?RvIL7UV8JBZ)g@o6BCG zSs#47d`0oO)N8r3%}S-sS8exRcl+U-;&soaxYo6@vc{IZ+g12C#_X77`0moXRnB6% zQF}^ZDp;FKUyJ4KxVJ0&$0osZJJ&?5zdxn5Tg?1isWqs&ka;f*YD&*Joh%1y%JZ|$ z1*PXtGuWF!iLkixOta(jIj03ZZ+m6UeQZ`TZHuGc?Eq;HtqE6rgTy^wd#<{+rqVN4 zrF7Mk8g<XytaVGzU9|xjl4t3$%i)sQ>589Q5A54G@7yH4H5=Pf6*A>Lk1w(P%m3hj z=+9kS=PpS*;D6w$1Al`3gL7GNbs7bcs?YViQ|GMSTB9lXd``5TJM-j*gQpALW**q{ zV@K_p*XyLR!qu!9avPotouA976VbSSr`8RAiKMi429GSBJ#%lXS#JMSk#@mydyvO2 zy|iWTCVlB$(yMAZsbupKeU-iHFBiD{Y)-!RHpX+l=PT9B$wz)_tG?BLS>_q<eJdu2 z+0)$PmHJC&&vx}w>vdnwinu;~(mL-s*;+5}Pue%>OWzWE)pK(dUW$0u_h0`P9CW(k z9gmYh`Nk#J9~n;KoBU<Zl7o}tre3i-oomT^!|>Yo@`TF~=d&lhI<|{%!k4s~{{NG% zXxPPzoGzXCWzLfAk3Wlh{|dOI;OXz3B|iVX8;4nW|8-l5+XY+~Y(LiRK5kYo``+{N zk)6L)-YUHm^py9u(!F|4Xa3J=lj0^^iNADs(zX8c@6}#@+p_7lXO%|bp6yGnKQ4cI zf6^?Kb9=Q`u3nO^Qaj=4*7g0lG8!*0=G^@_$&X<j%QoM;)ekZsym6U%Y*JqL{n_^> zeOv#;wtT{ut|j|czHctxI+wlq+|H?~=?@$eq!gu1c`Jl(@a$lz5UTJ9%G$lVtR?SC zE=S{~oCm_5ypMj$Ce?0kG;ZuX;O&s^RDS>PfqIp%Jx}!+%{tfD3VqORSmN{`d4Xhs z-3Ox&la};-+|_sB?E$F+OAjnNko)mY^no8gc3o%6*X-fn&0oP`p<;I~jeUXc2k8sk zGX!4m6w{bil2OyYp6NN0G;{yaZ0-ho=JMmeD^hFf7z+dolnhk2trYkn_k%@3==J|{ zo&B~O0{4i|ZM=8jK*VvekG9L+aNc3AVZ0;s{w3qgdoz}V9hiRL{KmK0Otq?_jE5VG zL+aK%l02fba>-wg`|ak9n;WLJFz7ygaHH>glC;rJW{Y-#g6Tr<^3~)SYB^@!&3=47 z;m~)}r?Q7^cWG>UT<B??QgObw!M}0k#@M@@k?zm4Oy8@Oom=Zud-jbQgXeAL*hbzn zIcYWPlnbmrMC#-pc$pTaw9;NN<CpP=8k_%9XU58}zsvPQc*DY(zWwL7_J6H);CQr; zwa#cW>wy&!$L$X6`dBK)7=K*nmS#)*za!cI1Ygf#-=puDW@abXZ@gLY<Da|(Ite{9 zdzp`gmIj~6S-7O|z@8A>jqAJ{qtCwCSW(0GVe=Bx$V*#)#~WAM1zxwE<ihbJj`<&3 zPFC7|?^!%oy!W}y+T7rO=FPDmRm+cFe?4c%v!5>2^OSokYgC<MmmjMx@YJbRGGM-O z`^>$rnP(n1&Q5Bmx7d)q=UdPNKjUhZxN0+lz)MzXW~S3B51jcIe$sZYG221eM)AhU zs;1_f?_WCQJIRDIW5y4j?Y?uTZ!z8=x?%pzvyA%!W*8J%eF(jET8?MRtVk_qn~o22 zOV@>cPPkyX-Kcg_#U9B379QS>+(#Zi>@<p>70Kzq!?9l_<@lzn;S4+;SHjtU7~1XQ z`yp-+dg=9nyJ^?vUpTv4?Su29UAqprckbHnVe?NigV98hrS|17p{5Orlj>AbH0B#@ zZk0S0w$!#@<&yv2Hs;+1@AO5_)J@vsaA`jW<2w<jwpAh#{SzeDcu6}=6Jap^?RrVV z$<0)hXW9MkcPZNxy_>4e<m_9LpA;)#RA0_uF4(kUV`oFLvD5UQk)6MS-dl_E)CJv? zp7=%LLhpp!xr|c;-W|TgmOh*DYolQ1{mYJ$r}9|7c+1YXx#$PTucu!peE2QD+@Sbc z&92<rX4idJ3Yy>jYgfDf*TmdyIcLv?%Y=MowValf|GaPQ&Znnt-H!`bo+K<<r(E)J zTfgK^pG(JYZB=K9y7?-aAtx{G^JVR>#$$3Dvp<PEXtrp3YwjdoTy4Br=dH%sPc^S% zBBvyzEj@5wHJ0^_D@O%a!*A`C%uZq^ck>T?ooSRX>DAG8ZlUwJY=2m9oH+CBD9@uU zF26VJ-T&nE)Vsy4og!-^g&G{6epkw5TX@<obUH_4=&E()jqS<huQ^*D7~JfupMCT2 z)GPB#FLf`O%Mp3(+sFP6vjc%DcFUI<%)RjR&bc#U;elmrqWd??a=gE~GfI2?ah`vl z?cW#PvGfm%75MSy_56tZ*V?PT7N542JZ`+cz}c<u`Ck8hk7p~|KY#Lddf2MHf7{nd zy)89eC$G3+hl1!04Ic*2w0~w3r$kky#wRd8_^b1^)|o?3e_DurT(G~lT0jYt!RC0* z$3M9^ZtmshynAH1*DB%DW`SR?w@I^1S?j5usqE={CTHQyy`B>F+rvy>XNn)6yL1oJ z9QDk@E`OuLg4~Ur4unhHKfSX>C+w5ywvNZ&j4GvXxL-<}#Kn<pSl|CXF>^0($@D9# za`N6g?#nYDIq*vI&?W8NM~`dj3m+5N=4%^x^ta!28I8W{SCxBMW5eb%wjS6SWwdg6 z;;ru{Pni>nk8*kxo=LyA{^5(P{d>;w==HiD?Vf3rTekJoxtY6l_Bx(>DsDVm^JT&V z!3SKP_JX@U>M<WXl9bOJ?zu}oLf+!^nPX`y^VcnLPO266mXck+v+Y7?O+1gDnxI<q zA7#^jI);Id(>xzHZsmOZeAVaaryd{3ExdE{f5XHBv6Jf9q=eLFgjlY=a;B~;`9s^$ zh7G!jM$W+x+&%9fJ8sWdowD+=wx>MPegUKO_e}OGT{SKe%fC-vax!gOZItMFrrUzM zUa1Q+oqT*IN6!1?8M~<^%!xgBm)!U^`D{+nq8pPB1U%?H65+rRd;BbC?DzYEO(#63 z%)b&iv%F!y(5?>ehUF2*Hdd#7tNs&b%M&BJ`|Nd9pDd}#VPz$6F4f0hVmVZCv3%Oo z(z($yr@Ru)ys_Zj=IL|xJ~zA`x24Ljrt0zEhc@5tg#Y>d>u|<y#~;dne-!^eKArja zr^n*g84R+&F|FJASa#OBiw)U&x3=H;dbw`4^ZtL{|L*=b$+HrV|F6ZME_buH`FrFh zhQDdE*BpK}d)@X$_k&p7|GhP=+G{!E?(Xf&&v!px_vLzg)Zg|$XIf4gXI|W6Xq`X( zWq(fiZJ&8o-CwQaJ7?;8mq&+9x~9tFw}08LM=G&AIk{=akI!6bY*hL`ZDqdfg2FZH zEk4#1C-@pmPky>9tVQQS%aw?usWtYcoXOABX5PG$x>h|f{%^$n6ywt-eujJhL^*7n z8TwxTM%Q(76&9PvS}(2E=f!^Ih&}sw+P8VfvTrj42Tkso_UV_{fjg<3X`8QXmrMNf z;K!+5J;?_9m(BED(|Nr#&-L`|GY{@mcK=8^AhGSnS2sQTa(z9^>hnd<=U1jOfV^*< ze)jpM;;OJ6`&>UCd&;!0@o=4P^`56`ucuo-UzNW5T<#6M>%9eAn`Zj1eDtSf>6(14 zr^k0Lo%g@j%gDw1x7E}un>$p6!?PCcW1VvTLuhE-v7&wVY!s5&$|PS!d7Mbod%g8) z-F?Y*^S7<Jp0+Z-=;~6ZX;KmI?3w07M}AV1uzB`CPiDjOX?M!*v)?H?&u(+`b+(lG zn&W38xo>>E@c+X4{}O&SN!9E2r_@KSy}soY*Yu#mLeApn?S`MS%~tlSoauRZ-#_Ka zijyT7+x4RJw%NJ5wq9M8eWP6XXX2TVZFjSn&2Fd6U7B~ksr6{h@53qkq-|TnUaviQ zO*Z;s&e=A})6=F*F|sbJHO~o{zH921!e?uKN(3z5&~U8x)l}>4D|WP0e%3xGtg$NC zV{u#L^|h=Ewp^KEs@@}>e`$qWYir_;*Zz-}eLg#3e&+6jQ8{~EwWZ8f?3s7vx8%>m zDsFwV&V<QET`JuC)#Hp%$=2i7Ti*FSpHr+IaF3;#!B^_~<2G-;2Wy0@3oe$f|8OTe z(Wk`IQ`6agPfy?Mwfhgn{qFtVUi{-_Ka)Z3k+WZ_iVec=|Kq!yenwOK^1{2j_WZi> zvLs`B-nA;Z+IQaP_HGjYEI#FH%|^%5X?uR3nRzwq_?nyC7Oy{vSN!|xIAzL|`63k` z?B-UyS@&3rrM27J>*n7NyCrRsY;NaYZaW{p+aUSEuT$2}mT$JWGk_FX{9N%Vdz0PY zqI$o-`_r{z!sq6_4Yc=7?2i4<d0_eaIp;xX>bC0n&##Z%KYeP=<}%^9nhzpncQ?jf ztE#I1qkDYbSC#$eV=iTH3A^03=g*1Enm<P~*6sfsdo8}4SB>vq`qH<B+Vf`ChLvhQ zpHqBV`+b_QxcKx7uh$-|xlkT*vzWR4@2Oz@uh07<uS)f8xihaal_5R;Vs{&F*Nyy- z)~{P(YXSQEw%oT!`L^$WAy>6*t^U!YGuyAm{9a#f@pe)A%>4^yh@F)#b~vy5a$9b+ zn*VIq^NZg<eExCjcOKr~c}&soD~-?ZF>1IJl>AyZs_+Nn+?(r)rMtSgD%XBD*!Hqg zTUhOX;JaN_rLR9ca$bI6NuljN$v?*7cRr=H?mF)OtNF5Cl~}`L{`>o!6gV#IiFDp} zZ&Bz?|I4OQAsWH5@q99;A1=2))zo_Q&FAI1FYatKoixL$)~s5-Sby@e6uF?UXSA6u z9<;FktbJx9Wwzq<I_qtV)BE{E^z|Mtm@Jp_;X}M}9B3A0?YU2C#u;gQJ{(h6x96#@ zn%}&WqR-zR-SA$`pyEiyy4@A^di(x`%GE8C51+T+JbnJ1=Bm>0J8O5o*cHC>uRMG4 zMakXMZOebx*6w(6?D~(=NAGQZzhdyS*%ZAqyH3h{uJr%Eat!R>?o5i~pC5JYnB(F4 z)b)$sKb?K>rn=p<X;SL(A9x<u&2?P;b=Idhwd$9W+f^mEC@LQ>{&152^XDIue;!}k zUioI-<5RKs*X44{?Ry;2XZN`9-`4}~Z!Ye&{rxAid(*CdYwMKTX7@{M-|c_8q9^yq z=X-O1e7@&>ENq$D9-XzP^Q$KBm)&ptfA2?*S64Xe&+qwpkmFy?C1;(OEi?YxeDZ89 z<$iuP{z&WfJwBeEns3eH+WF&c+IgeQm=8T!aJPTQY;${wjaPC(Gf`osm6Ij+JbGqs z`Rt7Pjhf4)l0Q$1{&{?;c166t&eqa93uAxodmk!O{-P|V=<#crl1I&<SGLvg-6%d? z)TnU4HT{j>>}!eBJ}-|ClPRzJTz`N0k|k5zcwd8jK9xIc``xT&E%DsCM@QF7n*0e@ z`S`_FeF`@Vqg8p%*}dknoChZF-S_ObymsGn`&SEcEg!eMn<1$_@4w09<!wAMSs!1p zeeJIJcrbXC%>V6s%x<U1zT5lq#3ly;r-nP5kEb!z{JQ#r>$=UC68mL;#dbZJzw`T% zO%4Kwo-9!RSNn%6ck^nGOTk>X_pP0#BCGPUFKgP`^{Ul+`mbNVRd{LW8Sfe8eM!AA ztTgnxY3KEnnCgH_-zLqAnzt=<`jYP|xgpJ8O?%$!ZhiNpcT(SqmA|s1V#GhM-m+CK zd(}I2)wdCcOU1mR7Izsf_dK<&e#>fAP{I7T<=vidKVI(ny5IQ5j<dH!&#m9-#{6#I z$AVw`f0wV@l`voQ*0jrgYk$1oxWFN2cWA_HlfARWG)_m!ZmWOJC2Y*lXSKq6>11`e znpedPWp_8q?tZ%^^O(`^_cteQy`JThx$@)DhBLyP?{~cm0EOR`mz#f{w*0?1{=Z>N zd3uD~>v^;LQcm+}=iNy?rRo0O!_o05r+n;ozSp1n=0|+|xA$6gMC8nnolErQnnf&3 zc8hgOj@+9bv(EWO;bN<QjDONC<KG7!vTAY{-j*lL9<nu=twnB8UdsKsi#0cFn&$n2 zOD8;F&aw#~?gj0CttKRPaot*%jQ3qHX5H%g5P5CMx)9sjyG;{k+l6;O^))Y?eJy)x z>OHN^XQ!<VuZT8cUCi)$t?<7Cvuw8hxs<>yCT3h(Rgt%vPuDo^_L4$dv&`LQhxeb} zv*zlG`G>TmZ<pSia6kRrVV#n>?eeGo-LCq(%xAt~`}||6aT6ET?Ny4ac_FgvO|5pV zoxJP*f5{Phc1+nXvrTrnOl;wt2VEO3ww*P$+y8G@Hj_bR)KSCcl^>@>=U>#y-Wy$- zaq>jq<g5j)>t@YU{8MyJ<K2RbGdJ*mIIzXFbG1l$$Bm{9b0hv=kt#2IyK9}p-XDM7 z{HRfUDE=%U!TZpmEAdC)p1H<m#Weru+h;-9|8t#gd@heZ^s6{t^}6Yq3(HEAZb}|t zFxKvkKDL(IWbzfsh>W!RM?dj4=xuzLZ?x7>;{JKQ1J7Dm*UvsSzh1a3#dE4Q(>BF7 zZAJ&qEDK*e_q0iwQeO9UK8eh<$3o}xeQw-Z%l9y@eqQGJG*91#Yb$>X%-2?6S$aS1 z);@!Ey*rE*7f4M=t(?2v^Eb1~yMWD>iTckOJI`<Ddsw$tc7FV-)GU=;*BiAd8)o`? z+3c5mSiN8Ep<L~>%)du>gguE$`<DD@XI0DFw{59W{|)P=?-CUAmgg=p*qoS|8YQ^x z%x|6AwMTZmJ~!=vf7-X&TRTlTzn9xxm|q}o&f4|pyV!%>tG+$xn|UPUg8h{<$BsXr z&6qv=<_njbyISfG9~PQ_I;_RiynXgf{mxCYoZb>^xl6is|KE{yYIcIbWKcqW{WZHz za^|Wn8TTw6e{MCr9U2r>H|@Q?@U%aQyYsr$B45Xrzg6Qo|KZP+%*`_;)sNq)i<i&a zzutdpuFP-mZ%2Qb-(A7Vf3J+^_`I(y++t#v>mHX@Jh--a^_tDgLMs1zo%(OTX~~i) zA16C)GY~CE54vf&-DwX)4A-9NbxL~geyb&lcLg5ko788v`Ce$~sttY@4!8Wxt7a~3 zJb2*gM($6shyMRgW7JiZO-Z{i(6r>>f#jA43v>-!E-4?_&+%H!Z1NT5b>`bOKWKgs zO>BI9w&CKWZ>P4L*tjH^lQC+4MA@1pwH%Jm^jaQ#iE>&{5cX&3cPqsOznO1@T)LMw z?cezt*#|{V0wunWe%cGyW&T^WVeZ}8E%65)H>{h)XS&(nXiKsA174?un_MquKMhM) zd8v5l--+5rd(T}b&#dcS{=V~lYWksfve6z^TF%y84UCh_{Qn$!?|CD3S#H~Pn<)kP z`;~GED>P@8XYQQ4Veg&#<bt0n1u74%FADq++Tj-Deduo%R}b5+qh)EgxeRYkvub(! zrA(E@cejVt{WC$2`{Pt<&X=jyT$A>?b^XjG=J#74+h$CSu=%5ZKr$^XvGbY)Pwk0G zdsnnGwG027;9)sygZsCN1#6f5J)4uU=bx5S+ppvW?Mt4Yd2^-W9n*t774@%}KQ~Nn zyxi#4@?Q6V<;Hh8-<JtK<nPhv{GV3V_x+jYp}lfCE4R*Ex~sg8MbB`v-$(w%(>WTS zTyJ@Q%{A>?fX?mq4Y$4h6C@sfF1|YD`oRaW*JWo-z8-r^_3fnW&!#Kp@73Pn`1j21 z-euRGGiU7oXtn05&gZHo&(PGsda3OyD-9m^SN^dpzQ-l6J!8|B@9URbImLA3iN)cP zsdJ09@2+imw|xJMnVBcc3a{SO{~_@pVS%rfvT*%J<~wdbTnl~9{P@egqyGc9T58M8 zxmqg?6B}Rca@Za5FE^pyX}V6!-iG_8oBtn+FNtSP7jEh^GJIUbzhUuA-p;o#+2`b? zefO&3%t%bD_l@~x@F7KO!lV+`0`-T}V<nA$hhEZbxf{mkx95J!wwHbnUWfdh<fp*G zIy-$w{eRXx<IU2^-#$O)Ni~{OqL9e{sP}A6(HRNeNneVTZe+6OWTkbh#4^jU*G&30 zFU_M$(t>w~mEGU)2N!jA&zQp;lar=@tSsdDe<5|IY9?uwZK^)C{&}h?3jO`(<sE<g z3thhZvx$@2?zJ=jDrVL{Q1x7|F19wVTlN0R_J+eArw`4#$v^3r@Vj)a2{yJLE$*H@ zu)5{Fe9PbMm)G*<2*rP|bJ}1Ll6e2!>xOe_W-*t3wde}O8*bj-$lY-GY>vYo`L6$Z z`iyb@weMFY#?}12vhQNG;I@LAOJ|OGz4HE{{O%pk`|tlb>SeYWZ9e+3f9ZOay;Dvw znKPNK$|;wa9vR2pyt?a2N_$9op;~bL?8zU0_L^n*1qLb}J*pa0^Gt3|-5tNvSLgZf zlQ_HeKi6ZH>yf)TgO{Iw_QF{1`Q+*6*H2{(y}QWiT6{XMn%~UGm;WwZy<2^2t%xmW z<i^jB-+xtFuyqOVnKi3S8@?&9WTeIxuhdgpU|o=?w4j1-he7Uvf5jhXSx#zW)>l1M z?|I^d%`A=k+SBH1znojKFUEl5ivr8E-AB(zx2$_Kx3T*2eTO$*ui5s9Z#ELxwYJ>n zuhP0la!X!w+Du%rn(=~Snd`KxqPv;4tFWlgxXI+XUce~m>PF)W=AvJY9Qba1sMep= zT3>Zn<$e{G`7`hMb1Yh?&cgVc@p-!DZ`A_lH*IwXH@%6Uo96#u?UN79N4}@%KX`iN z?<(#sosqj%m92cqw`cx|ysv5{Yx9mz%DKv&d4FY&@eMhJlv??ogXf<ed7sW^E%d*F zspDWt)!vuutFvnM2!C+adYzLtS*6zL$Gv^Br|lG8dOmPnopgLbP7=dSO}56n9QO~G zs;(7f-o?)GJLtab?#sIwR^FUgE%<Ggox0#_eT9;z%XjUL``}P;^UA*ach)sMJ1aHS zmv5$p>hEuJgw;alSTHI+-s&l{{Kw%LOQp&}4^GM4Z24@Bx`w9a(--#T4=;zGTb|gw z>&t8VHf`fI_7TxbjOL!~N!w*;pKkv=Y?o={lN}q99w;5szq-mqt>^!#C5#KDbycq} zo#r!L{r^OrGX-<rlxed~`meEk`PHeD{?#$qDU?i2pCq+;!k#pv0@0i5RVgC7b)^Mw z%`=kUG2zoq=Ojyw{i*ja^-Rt7*un03S?KGs>)RVXec#+OvsUu86@vxm4+YbGI?gw> z8;v>czGqgk4eQ-zru6%J3h$}=>s?#Ugp}^9=k>VLEK&3Kn9&{P2c`+Np0@=zrJRrY zws+lYrjtDm%icI_yRLivSw_~Pv&R|Dh1!gJCY9t)GYiq;`~QPs2Fo3%7t{Rq+O-@g z=Jm7|zW1JcTZ;dyFXvevupRnwVoK_*Zx<@l3Z`Dn?4DL*S|GE2zj=?ezU_iTKi9MO zYD<exKiL;+_UO0#@(+hzH}81&=<lknxzW3>R5eXrelF>;S@!1#x$c|YCjYuq`}pCU zvWuJV)YZ#Zy}8i$_xqb2S4!7s*p_90$8!Sr%l}9{^gidts>q$6AJ@&DSjx8bbmzmC zPLcm{ZQ1RDb(OJm9^TPND4&)hRK*m0HfPonvC|)-Tw8K(w152Jf1q6QRIh@MTE#u1 z_xaY$H!KQm8Vz2TZklO3YhlgSqk3E4H}0JIE?s8Fe4|c*JMY{izOyMR2glwQHi}%i zOIp|{%v?WpbFiEOOXX~VU0R!sHupF7Cm%3$y!5ms?_^G!@UB1WPZi{`oc{OMyY@Ik z$K#OO8+j%do>>!dX>m)>i@K9l#u7e1{(bE}?)`aZFVnW^R}2jAZB<uf@JaQFXO5e^ zg;_DV*7jlb?F`LLSKXQC&$=nhFsaXTUeDd@{LIgV?qz?PewSmxNuxTobzJi#V_ah@ ze$8i)`1p4TSGV%?DN~9L@7I<OufKcWnc>IlBlkhwzsNm1Op<rDc8h(!;(dS0?RC0O z+jdu7e*QU7L{t<sr@7H>@(1^)E04c95y+m~J+tso<$rDShfBBZ_^`fyk>g(_FE204 z@2BE)Vzw+%PyF~ONN>-x)M@*kpS>#LX{)NbIbk~Ua<3|nOY>FdD!u&Z^3!mV-?^Z> zwhqpo@?KV9mpmu=^*qmGI5(|i^Ac}P$5Wr)*jmW%@VpesCG1<q^3F?NMBQVL^AFXk zg@>eCu6uf|z3@-&LhvO~F5y(3AJuDq^Les+SVdo2d-}#X@5aw6cg5p>n|O*In|Jj3 z-nkci%l9p*=8P>(Rk0O5zBBcc%7%zb)|32{QX1QsLjCrfc$De1A$GdKNo8eEe{U<( zzVo~17_8pCYTcQ7W?hkMW;RxJ$AebCjc1#q_wwM8@9TALuh`J`*i<r`_wik`iC^{{ zTC&i*!Q9A4VCA}Y&c_Mvvu{rDIu*+tvuZ=#^UEy_cAx*h(0=91p=RInKklAtO7Hwh zS0XNPPwJaqB9N$bQaUY6D*c}y^Sg;9ZA(_ArPMLJ(R=CRIbHBr)w2Kn-vbXBw00II zg`X;QDEB<?Y2|dO@MGaI#}AX(Ja-us+W202cy*6_<#FTYt0pN=D)fFomrgytIZc1& zl#eGi$A5U3?!9fh^|J?hqGbY+aqaROcx7+xJ^bC??_tMf`KPnGzb!q^_jbm&+cI+1 z@4WB*D^STcSsA}B)>C>*&N0xqLFo2<N16Zsp0l34_i>N#F3?cY72Q>b`yVeeOnFi< zuWF?|s2ZBiczycq#XHVFU%cbrx)1*!&DOJ?@YZU&{PF(}a@{|FeNfo*yOhm+{zr@X z`-<vz{_U=pG4Ew`uGcL0&5D1hc{%O#pV!Q%=iXlBd|E2@d;VQ9-yKih*zSB6|Nhgl z--mDJ-M?*_v3_kv!2XWw_odHWnE3eS-s)<*Uwc-mZjRxtV(`$N{ZUvkfZ1u9dYXbC zQ=ihAT|Rj^f8`J9uje|GvusIj!)A_|^ED3yf4mbt>6`MOqFlZ?lOmdu^d5@)eVTrG z@s#slzAB$N<~!3_V*1x`C-G}~+fp<qI8KQ__9VgS*e3SG&MAp4H&4vC$>F(MXp^Gu zqvC!qDMlH`87>BG7IucsH#fdb-L5EizNd7rZsznG*A{ac+s*GyeSUnC>c%(rs=f*= z!p@w@zskI?>=leW_VZ}r<XDcGb8R_<rQUg_^v+W}6Xf@E=BI<7*Q)F?I-9d?$$HM4 zOHMJ(Hs|=hl`~PSSnEK%#Qo^3o%7>9ni_7tFQT?%4zG;Ii?({Bd9N35aQN(*^6y{n znMs|xFTW=g{rNZ7Q^Nk2!c+CyJN`f4oq4Du*}ThQ+x9p0Ptv|^ULt?Ou23cPU9hLM z;Je=yP7Jlqi%vAR%)F?n%-N_WHqGLf@WZ$_jx#t)YyQ}9b2`S9AKKNMe{}v7S<c7$ zw<lUI`_XdaZcduAsG6j#?~ghC3O+kDqm53UQS*|T`LFKm8~0Yp%k`>%qJQ;H*y~q% z_rBu~p2_daGamggdvKfQZE-`;e>Z8f6~Eu8roXQ|vsL`<^v7yf{c7#xkIr+y7rb^; z_`Ln)t?TYgzb|*_;qvzS_m|!7oVk@98YB1gj&^n7`Ny>{cijBd^W#Ss*Yo<hKdgVt z|93dHm1Vs}zP?-kT(|Y+?3=5<fBRDT@lH?f@A&okJ0856Zk+Zn=vvmT3GaW!9kt4C zPt8&DpX>U3-gbl3TRGo;UKZQ)`HQ5+;gr4W+4_>FJHJZ;m0MQH<?nX<ny{((yX{BG z^!SfS*RK1N|GoeHxxBbu<e%TWFHXLhbp6Bs@BZ<Y`w#JHF?_fGDVZMoPid0I+=BAf z&kW^v?zemY|7o>!>C^ubfA8Nt{`v8amofFvcW;T?ueqOZkK*I6Kl46aiClC4r`MSr z&F3ZS>mD`yvr7wmswcm?dq(R^-Zx>c1!2MKuXk>k@a@^QDX;&m)!;Hd{C~5EamMuT zU%x4_<jS7Wc|F6m^`ryunVhC2-&$@SQ9k?TP0sZzZXf#>r8k__l9Yd$H6xvEb>?H< z>(6F&e_I=NQ|_)7n^yFe`1Nmo<vjlS?e=2jv)8kZSe-Rque!tWblR8dzyF=m@;_L( z_0M#h?kBzBdY-Jsuk*1lzCKIXma$(>I)6rGV0MFW-;H_mY^^?=(f$h>w%+$Q(de_d z$mYb=X<M>yH!)Y`oy@b&i^>c8@Ide}Xi$rz@LA_wW40y+>ypgV#oy<i=?D}SOnSEF z=bD{v65@MepYyJ+{jcz5%A_lcDj#opy;-+<bL?}onBt53R!sW%Reb(V_VzcsP5=Jt zPkge%Gw1F`(cSNE^o6}PeY$6L9Mgi*(^L0<TT;Bb$ztA`<EJWgTXjEtQ(0piuFd4g zG1JN^Evt3%7sbS&Gm%qSn*zBX=(f&$@s-oDdfCSbTc;h$`Qvw>QjTL?%sj~wJ*zKQ z+$+A`T_{uS5k7C{_am$HPdLw$EIGIHYQMiiA>WVMXEt_!bl&W{vh*^WWnORbInV+T zgBPEU6|d)gGRyj&=I!McslR$G=M>xvJ~MCg1l}mK$7h7gPX=<&Ek3t%>iOe5yY?7X zzS3E@|8s2Kj@9KiB~D7#pLx3bVsONU4Kv($Ul-j!>z;P^+Ih?8bE0J{4PJZ#N5XTx z*&mNKZ2V(W=N8$vp>T24o6VQS?tDJ4d}B%Ox0B}QET7+bb)wmEx;dx`#2;tTK50_W zGN1Wpx7>erVNaxT)cW_E+vgUad#caR7JE-qM`yv)UD=n4?wS_g6?s0VSnqia6J!;> z9BAqsB+_4L@B*~9Z2EB<i4xG-ozuyw2Om6|bjd#Ld}PMXoSx!!WxtcJ)o^ihcNO2| zdp@Vwbh(a%j-H<6ho=o2_Ze;})?3|pxNd1;^RC535P9{RzBU;zwhO<SRatbwa{5j4 z=HpY(#BX@<IcED|(7G&7wq|8<JsrJ2g?GNpF$Jwas{QuW|FgKh-d`*5R)Fy0Q%#P> z+Ibfiyj#I<_t$Rv+ro2KgD$6^Nj$Q`^K0Y3oq65G=XTBt&NKTsXD7G*-Y3Vdi~G&9 z(hdi!tKa?PCu=Dech&5hC%_B%V$N!c^US&#y>#i)^>aSepFeXxw0TbP*7M%^ybs^< zyX(!`zu?CdsLE9v&OP|UdNjTEPX3oY$LIam5(k-SB)t60lPlfxB6k#Ad=z{(Cu-g8 z$8(C;rG;{^oqf~K%H7>>@p+T@v$<s#Hy_>nT<47I?ik+L*7<?!W<LmX)Zb^=!MFR) zjtxted?_@p*1Z0<@SJ7%XQOuxNon%>QCoKW<zCPKYti3x*Y$sVFpdw)2`zu#EWdHZ z4wdU?3s!ez?mzqRL9kq0#U)n%|8~{qnhrld6I+qC?)DSVmZX5t*IZ&^B6Gr)`yH&0 zHpvgVm3!Na)w=BFin{53(wi=}oed8Xj`h9qdq(xnXN&&cSozv*XI|Ip*0gi;&NnRF z|KaR|`QE?2%5Q$~TYmYK{DA#0wptd8&&x~O|9n?1X!YJ4Ezs^D-^RC%>w2ZHzK)B^ ziWS$3x|7qo{oK_VlO|oVn&idJ6*BAfwjH;YZ&;8J`1e=ozS-9K7t>W$m;P=1mu6;l z{Viw*)1y#riIUZ3p39l}nI@lIlL`*Nb31P-Kc3;Z{gx$<z=m~8c#H2!faWx=%E|Gp zd!}-gwXC<jyP>w|h74E~v~%a*E_J!<?0jcKok3|f{5NPPkTrvbkIZ|q&8?u+TP(-( ztkJn~(UC&Urw|d)Dz$aTRJJmjc~tM>+fa{X$IpjC@T#?L8;O{`vRo!-KKyChcciEI zoaOTa%?GbeTT<oFe<b+iT=!>>AzN?w_}RkKcW+W*h+*hq>3UqKyl3?d&;rm3gB@1Z z4F+}vj2GBvu*Mvp)No<?4A9JjOli?whV6%BoH)MOGw~U2PGay}-0-}iszo);?o=eG z#IT%RJWn!)*Yg%j2JaqrnVhtvliIvqC8p^!9v9d(p%%Q;Nbmf~GevjV!Vjy=b)BJ5 z6rrSigX_oHGh#;`A8uTIW=-+Cs8^sBtTCU@Jb0DKSWuxQ(38hzwS+IJ?L(C3>$Pvd zdym$ER;13GP{Q$H=}g~qrI5{2)fH(5^*JujSXY3S@qvnhGxFIFHXjG`jx`_r^HI`3 z8F>Y(1$)M9_!?DCl}e{ew+`qSM^8as_4-5Rh9+Fk%!4+9-xcF>(w2fVuO(#XQGa>k z+v2+%;ONzxCmD0}2v0nC^G{~=19s!z$w#;39Gw)xA+qG{s!dx;IW$(~+*z{Z*Gk>r zAb-|8n0YYtyhp<G)E)0M7NqiSUmB%iI(5tYqOfAd4?l{oUF4ORYBk}CvELd`S##!R z#~W^2KA&@%TaM>h|NBNu%c{4&4a<wZ-14+%6j$$)XRvy@0<?fOX6=TkcOHr>c|V9J zG|jXP^xV1ZfS^OP@$OBNid+xW9hjI_7Nh#qiiw>mo7uTR9MpmHJMiuRV*8Imj<7kq z1q0sgKirL6ligKv`D9qOFti;r)1OqL_rd1F^NRX)lS0@n#BOl?kgPa!=GoDzTgSfd zE3CP@{7jBh&Ur%zW>4`W(`!$isq6m!U*BPN#r}D|d*4^wjc?Fx=-1jQ?qSup;r%BG zPu_<8jPGaM<TpHBu2!HQ;Bljq)$=!VHdDLjE0!8xjv0*Crj&>T@K;<qvn;*x?%T(I zD^8tZ3%j(hs%6cy-~+$Y!h$?+T64}nd^&B~>IZi%pU22W<{C9mx;CX`))N0l#}<ag z^=(D2llUfG=@z)~hX2dipyzy(-m$7NO&2Kpc;eeHcOj-Ozm^3_;!Xh^1!e})65KNU zd#3Fuk$;f)pe%v0KwGh3<(c}1y^Tr_bUlx&o!iS3!|0>R(!#7_s^}w^IqTXp=f?bo z+@#s%OxJpszh{VJ-^CpKc*b7;hW*EWZ`A)#$s@qnr^d2dx#8P^08iabdl@}n3o;$J zn_Md%v`_kn-2*YF2{on#PO9G*t1A|?6x@H$;MaBBnjwAW&7?z%Zij~l`JXc0_~zMx z2+!;kO>?#x+<R1BMte^8zGdLNGwD$6KDikRFZDccr>)$qztFqBX$gI{0P*MWu9+Bi z-Qq!$&ic&)J8TMTloWqx`q-FrSty<Ux;OYc^9v>)f9wD953U|5{QP6qY=*TBM-NQv z{A|zQpI*84aijmE$G15gpN4<@QT1TokBasyd%KR?GwxRJo6X23GqvKK_Ja#rcEN(j z3h#;UkC$g&&fse_C6_-&-2V8)1E~(Hd7efcIsfhSkH0&FZ&+Rb|Lo84dJszg`aAOK z?oM;IdkQS7zh~UMK6Og1@Cxq}mu%D1ZthBauTau^kLONC8u#ifmor6MS?--G`sd*k z{_<OkPN1OT=P%wmpZ4iS{?E6#5EaC2JbT(xRePW6Klf+seZ_T#eIDQ5H4JO~UtF`5 z*)in;Z|{<4EpIZ9nYq7Ll`HT4UuzjyzBi&=-!odU>0IpN&)LVmPx1|Sx?X7HZ6$w0 zXa>8F@n*r!;=P<RkNem$$@Rq^sb)}T-uu}0X0w2YZtuNcZIQ09wE&&R4>#sEhHnz> zyT7!~!QRuHv7e!zN%g?w%+z{@brVaZ3zQ3lFYsCDuDxUOK+f|!<8r2Q!8Wauh*RGm z%_%+?nl+V=?QG7LCEr@kTqvIOk6|9`JoC*TGmlye?J_#go;U4_jiQFkq_B4uha`4A zZu~gu9?LvtyG?JB_XaU0PyQl2>6(&H$PHl$w-ZXf$|<J7&(!6XHyt{)>x1-CTQjE# zkxV`6DbkbEX57@zJil~W_{s~<TAoDg6ne0mr_{gq|6lP$-NT>H=H%_!KeaS}no!e@ z<(KElvzMr`bhUO?-?NBz6+5QZr&^QudiG5N&st7rhFwBNrQXVaKd&gUaOJV$S7rHg zTVU6#a<k3)mrjW?Rv#_;Ewt+{KkL1d0nfi}yfd+-=gogXqX(ef6TMMSkMqpBnPI$p z)`h(lavL-g_uN`OsY>OPuleTcGi#(5T9*ob$dY+o8nx&SlX3OMGkhvOaS}X#CL6SA zC%2tEQ>R{{DL?Vym$DYUOO9g4*1mN;F-y1q?`1iD<IO)?bf0fyO`COd!WDjlz)RmJ zm3eQeG)r9Mw9G91ljQ|wiRoeAuSh(3!?Q=4`Jh{iPS`1ZhsiT`djIS(h%1O#wC>@7 zGe7R{+UTcpRlR588XtM51;0EW{m4paY2`GT*~5Bm^{2V%iYt4UOmB$iY<$7&kZo*j zEXm=eHKoLU;qLE6hpO|_)7od=%$^qUJ>b&MNp&hdW-}Q}%s0oLd7~KZdcC9MoMrg# zwU=fdTyi7yyJcd~;mVeoJ0;ow9bdPr-ecD+1II<l(~2%P{1@7lXs*oj=KQmg2c@3u z0!D$>0-Jhj8>}W(2{X;z*7D}=rwolu1q03-y#{$g#=o5|tvyiS`CjOlVVSZ|z)j{K zx@+g{XUacj%J5rY*G7NC&GMJyFPw>+w1xk}<|Wq~BU?^NJ)e1V`J{OYEZ1u}4}M#4 zWb0*>z5F$rFRLHCZ+Um|UCW!-*)8uM#W|>R8~3hfz1J^XTc*Tvd+JMP38sQ-n|}fo z8ijLCem<Z(iLblge$A2`HQz_3^=nlgv`kQ(%Ti*z*)a4<=7GXco=VWJl&qfj4nfV2 zn2zlhQtRX~+wA!9r_!M{4~3kbtx`6)b>?4(_a>2<H(x|-YnagZI$g5tcE{cJ#T?1U ztr%+0=Hx`p^yNILaX`RWT5fl4zV}wmz%pGYhn-<M)(Up_nSWT-o=K6BzA@F{bEU(o zyB}l353zLaYbk6t+B{$Ad9;wxcIG5DL(`8*#ZF>xij8Ma(%3JnX#LxAq4vJcnYGjF z{F837wHzp)F!OBJ`L{yvKR>D1!?vN)=)$Bc6E5i5P83*D&Ti-1tJ!;4pzHGcjm76I zx9|FpVBqg@t6!kSckQMPpjO<cV_NoU7ea4eOL=Kl4oc$xJ|%#6?CAAZ8qBb76H==M zZMT7J1)9UK<s59g%@3AIHi|1nKg^z~djh$s^d<3RkukWr*^mz1`7jH-_vgII+bOTh zK!&X=`v}?`w8hlGY^!xU$Q3dbX$JFu<RpPwJE0E_Cj9$Q3f3Fmh}bl7@Xy0)VbC_~ zbxiXeCaFVvYepYIn}~#OC?T3tjv!NZ+B}<i&_?lPwdd}q;O!rCR>!WID>)^Ep@d=A z6-}KR`{MR4;$0$o%PhS4%$jvejyhznyZtC@{?Tsl=Q<G)HUbh1JZ76Mcjit0{r}rJ z>piQoULJn0qFA!TOI%=rn(HbL;gwyBZiupOEDsbEUAH&sZ@`hVw+&5AmqbKZSy`5R zR|;D4olmpn=f4@Hht8d}^b9bF*?jN$$AfP|zrES|{pR!Xv};k1KUUcM=b72%D7Z4h zp#I%=VIltQ9i0<z%u1i)m2vIImpxOpMf(4?=X|NKQJ-O3Ej+`j`0ByP&;NUF?)@I{ zj_K#4|98q2HcnqvH}&qeN*lTU=SL<jl<4W~+-V!W?#A`}eakZM%r)QAu{v(4%Epa$ zo8El<GyCR-N^^U^ILGtH+tt<N{Uo>lcX;HxYM1ivx1TC(<j!|0E|L(Lc!~AXbGfh; z4)fZY@4uS&{7&(zwB}5?es$}c8(%Y&+t`>Cera*#<m5aV8(NxEcKgZ43Y&S4{jCf& zlqTut|C!>Kn`Qdz#s77ur`pHYn9Y3lvBHn7t#{Vp((g61X3Uur6dpP~eY>q(|MAC` z`<8V7>^d>mmFxNH?}hdHEVgp|F*kSGeZO0hm$Prr$M}wp`Py$lp=5Na!%@)ER!{HQ zRK@(#^V9X$i#N&jAFuc}k^8aNG>r&dHMgqN?YdAy1)G0<KFYu6=dI$T=y|cKyFo^% zEAD*!_{XESa+S$RPb393r{4Wm0SdDp6Ww^!<n1o)IUed;lser|uK)Ryr5__jHNtm! z2o;o--F^i2p8F~Zk%?ZKn(m*jt&B9Ye*3Y)M&0nNROl&IlQVhNHgf&Pi~n#`vAqso z8}caO-H#nP#>XCioO8z{H$Zcr!@oPan{M;<AAkH&#y;q1f=`c0#<dq;?%d4mIsVvk zXJ*8AlSz|<&z*vq<00C-^YLS?Y1+Xz&WnGkunAlluuh}Xvz<F_yDZp;bwBPDffyev zY%C@8*}!4>v7*Qzj;9LblE)vPB<zv+0`lJR$0rX~7{2%jW>0SZm~;V@=;Zp<*&iQW za2(9`=R4Nz2y$D6%{-YN@n!-B7j2P<*;d4D{A|t1JY$=AGVL8c0uwL)TUh*5Y+_CQ z;#zOP%KsI&N;WAeZFNa;VSC!@>e6?(@>Yk3zF?)BOP;{Qq{dB`U7wVFkLMT66ti01 z@ucuLck9>ico)5%qIFtIpWfcz`m;hoX|d~)sIuD+KekL>eDp!_-{Y6RzVrVVbh~}N ztmn#IwHl$@`Fr&%o?1jd`}pT|Cd-qb?dPrv|M-x<MW1U<<(wSLo0=SRL{~bvBsIo5 z+}wX_XNQRRM9u>a3rr^P#4xN8nRsj6t;hq~4z>$y4a8<>C>1&|KFD3*5^-Ie;k&D4 zuHAxz@mqgd1k0b8f7h+T=HZdl?Tr6d*BBO=MQ?k{oN#(utxn{|nX}TToJw15GjB4t z;K|<FmD3r-R=k=Y_$rin6{B_I{SZ^l05u2M1xy>9LZ`Ey@?)Kqpp?37m0H`^XaQka zSC=Jn6V|Nij*VpWnWDzLImA?Pa&~9=bFYH0rU7E&JI>u)wac{m;|eaFgDsAB3A0wI zvA>#9e0}zdb(#4Q^2?dGHhw)26KX58<67Fgs!Lmn<8ItKP!&2?{#v}DIiI)juDUn> z7_JCSd~_fx^zPGND;VeW-F;jt`Q@Y50*AW~D!NZd9$4tG_wK5y-P)@;)-YKy7#nRg z^zQJ`O_;qZuT*KfUI5pO#j9pBe|^Qs-FV8uHNhx=GotP8e&$uXOxENdJhsbbW`Fzf z;-Cr1M;@$rFaGJ_^UFW?FV|n3^mCEm>o<3&9}r$&<NeU7vhvZv&x^mA=6|gEcV`24 zL(p=uq|ev(_WY__DBdjR&+z=^PpQugeoS#f6a7~0vRV_D|3D}~_1VEr!HLBorPGBi zJ{uZtkXxavWWMTEjo?fEEzD_&_ja>7%r`JzwZm#TgMZSZjqM#**xi_K8#t;d742KK zuBXUyIm7;4-+UVim5QdWTGcV7q;~27maCRZMQc~NaruT7{q<q#N>obj?bs50^{>(; z!OvZ1kGdqY3TCcf#eFxzx8$4N(IX$(4=4!k6s-`M(YN>ddj`Go+<8oLEOkt?I!;uC zzI}E|iupd{>bnwtokf2cw(qWgf1KsxYo$-zA2cr<-I}Yb)W7nT_rqS-B^noaGlcZ^ zgqFIpJ&*10t^UaKDuq+9lkpex)yC6vR>-ziT8lHDbzL%hRbL5Hz1|1G3o_ET-yX2= znIgvKCq8lJs;bV{_Vxx$!k>*de%uirYX58ZwrU&oNixc8{4zd|u1Bt3ar0HA+}`E3 zd*5fxURLv=Bl1htuk3GcY>z0HZ$G_Z{lv#&OaY6tG_AI+JNWv&Z_1M&tTVTLt|*dt z&-A#lI@FXkXn)V)m+~U~N=AG;L|?GSaOwnIJ=a)NSj>>mcxv^lz6&2e^o5qL>!{(H zvvgHBd)Gb2wpFi~&l#H>nfrc|#`PfP7fV*%xAMRBeg=aS<DQ1jt7?8SaZG!(m6o&a zZD-_SvS0lw`<+!C>pRwY>}spBvOJ<(ekCfsj}px6=sfAQu#Wk9jeJ4#s(&&zdP-*v z6eA)-w_BWlq9yop#j3{**Bkjmwr=lu!elfny4=2izhLvKb>)BKIZK$dR=p|{%;fSB zn<yyQQX2aG{iFk5r!qQy{=cpD-O}AHhh}i>KYDfMw8q?-N=lUt2M;(tU<&mWo4CF4 ztb=j+dn+dS?h`W{F7I_P4Udx-{yTrgtMELV@(vHy&{%Gpmd>w5t7biV_5aPW<n#A8 z-hXU4@ri_C#aUI}9UI=x++*^AJx=BW^ZXjiW~QG-o73}|*6O`Ie`V(C7j>@5^WNR< z+_aP{{l@hZAGsM`+}On?d(PC*BKcg@ww&AUCxuuy=L9ERN(}q6ajNS6d^NjpovGSl z&(CC~c66S+_2<o!CnDMVlY|Qk7k*pX*?YD|E5j+t$U7`2?dAJujp$b*udZo%PYm8< zcf9WCs<msPcF#Q<{XK9=LENb+Hm$q1o)Vm|ZRY*9K7Ma+X!iEanWy8p<_l%TPrLIm zvRY;P+Z*Q(EtS5xrBwgw>M2W}NiIDxTRnW*t#>!8IaohEJ2|y#UugDr<NA|q-{d`( zB>ha96T4q&=cWqXd0#B}Pu_GmKS6+nqt)bahw#L62X;oR*r*h6dFok~IMIoX2f}AW z1U*>Lp~KeJbwc35>JJ53Y}=l)Nj-WtW2gSx#2F4f$K-r&ELvqfr+uBM8k_UyTkndc zbojV^!x^r*yv(2R`t<@~xwQs{Gjx=m@7}ngm$Uyu^Ij2Q*Jjrz+6g`ibL@Cu=qt@n zP%2++eUI-4^A5EIQ?E0hb$Q}`=Cq_#d&>3q%91_K7H1phhm`Jqojj{!&EZ2=A0`=; z$Z9qw^Cv&NUo>My-8otP-riWH4NN<HLc=Zm@5L*c8!&Qz{>ItyrR>0YnZ4zPGupRs zndLD!A9DH2cxHCTDz&Z?G7D5cNPg2<!~Tx5r=07V=A1v`x!)`NEG*r4m_&3Y%ucV0 zKQZ@i^rr{G%0K?hzR9p;iA>_Z7WWmQYfF~ZuDQLVduR7G<_jB3eWOfP`pmnt)5F6j z?M#d7of6;6KEH45E4DBH{&Uj$W9O7rRo|8;w-&kXeBCz9rX+p#inrx&El=$I|FcWq z?cB2u54XHMJzeuq?sfCU?`vh1w%y&W-)Jo_drDmS`4a2zs<N*?JbWE%EYccW_q+Mt z`JR+pZ?3*P#U*y)Z0g)sA1}VX?wV^~`%S9K{x|RB^gk2V>D2G5u-d-<?1V;hm)!kn zcm3Aw`s-Xbd$-xyk253|KG$9Q^7{9rM~{re4lg&YnzQ$}(2MW$yLT>*=FWR(+M@4% zUiHU5{rF`Z@%ze^eP4y2*`V(q@`;hjWsbG@vBMD+KVr`uF0;;SY*?jN@bKB}M#dQh zIgg(unli6DxTk%6*NNs;v!=ExRnE!1&3t^D)$GL^tu?Y@ZwLH!nvua-DDk4Tv&~}( z!|ugjuCi9hREYhM$zZ(0x?mOiotlGk>;5+?6}7JNJGLiXoFTSR-Q`JaXsyKOHA`o@ zzx*|q{Rs2MC9>_g7ugiJDi7ak|JAX@<5Btd_mUAW<U{xJE1VH#-w+sj+V#a_2D9fI znB0#_y?b9Pd1K`bTm5(ccx!JQKK`8Jg0%2|#Y6F7)lJ1w8z)p|7>4}V%wT4l=*+gQ z(&naLU$pO>Idcw8Pk#34u>R@b{<Co#`p)m0b@JWbcB72U9;tlAM|&!ZE`EA*YUO{X z9+~Y5m%rKjMmnx7o}d0=g{6$;qL6hFbI!fZzP!!V)N5{3$JW>7pRQzIR5k_0T-0;B z;|m@dTh{#GnCho}%=PWLC9$VpP0@?&D3vb%_~Uf4^0wFwCCsI__Z@g3xY?q}Wo7hc zo$2ur{cc~iCTNH2-0z7#5c&4k#>CI>I^ItIw{Pa>-TUThv8)c!Ie1mv{K2BK^q|i@ zL2qu)Kk<F?G}j#N4Mk_otjc6(XtDUO516ys>iY8H-?y%;3Y*p3K1tGE^OwxG%+2A! z`)lp%X78T1V~516>w4!^WnTv@j0x<M35=`$u6}Os8hg&plYzfECQT}8DGTRMxUs@= zg_f$f$P%ZWi;f)Ocy0DB>8E_;MZ<X>D_)r;a0}FadK<Wl`EN++lvR36Qte;2`i0I9 zxH{`V)B~<^8_!Vtz^i@{Ka6f=U78z_VRq%dTzd3thq>PlJmSe&qo6dEseb#j7fs!+ zHx5nt&;46})`qU5HlB*-xW8PL{jq*k-O5#;4f=z$R_%%ojSs(C+~7FF!>xDYsw!PI zh4;mB7g}d5FHM+{J2~Vl!?UAS`UPD*&vwZibO|!}v(e_py|dgo500tuuG+=Q?fkt{ z<kESrRaG3HCARTsXL&08k!e(UBNxtQR~XD(P$t(|@ZQY*t!A<qW2MrU?XUE|J^%Pe z`H95&1*?oouE_k_XVKO>`}>9G_rB>w>}uTEeeH9|dOuLTd{5qH>b~!#57x=)E^gCy zw=%r3Dl_!clWi;9+l#Y5{5^kqhC!tzJHP5g{rs9E+kW$_TzY%&{Ec0;^2^2aPb^@( zn{j<p)hw&~f48*6`Pa*C{JXFvW!?Vg!DhKPy+1Bu{jubKY0QOXVY94+)%2gAb8lOg zUHzov(&Fa{*Vf$jycyIgX|%yw_x!p^QFi$stMp!ew9CH!{>jF=<ewkDu6y~b;@Q{2 zH}(@eYSu-F`M&bL>Xlhu9<irJxY_+oSBCY0&wux)pTFmP<mgd@_w8Ti-AL8m{^6qd z<;C0H|NK!<_SVv4qtefxWfx>k>{dHT-IlAb)Co>s_h_%<{-XE3YZrYJPP*`6CaBT8 zyK2jmkH4*pvI>9qSx=d8=0LxC@Q*2bW`28ka>Bg>|94JZWS^w;J~ov1`=mb%UqmNf znvuRnW7`5o!JXp5@}D<eNQ`~px4r5sqt2J<J)J8~$jkhYt8jIZI~;bPuB$S8Uh(Ju zTny#R<|zfb(T(~pPi%AgVi<EivfY@oPcW(aelKem!`cwjsQS(Wlh2(FXXh3)=j=JU zR%sKn1-l2Y>5hP^VjYz^{GVCA6@E6@f5d6Jb)$FV`xuU5foG9(6qM#OS{wAgGC#NY z27krHUT4j8Z<jlBcFM>6Exd7KKmQkzi4$-9xX<eodNqDgNwNC2gw#bHJjeHRUEQ$Z zZ}`P?vd1nTkWSKGlAdVK$HknVa^*y2n04a9zvo?-Xm5Lvv8Lk*>#>A+hqkSfvfVS$ z(4Q&TCB?v9+&`8jW_hFNW4ZGu-TFTAe5*_}xqj!Pq3Nn=DIb^E*_72E4>7#{^-`P8 z@2B=9d$$PgoiFS8{M~!EH_7MkIs42?o7mU4HhYQO`+{|U|1WGwQS*tD+o(GufwS`N zvAVKqD@Bfif0d>)P1ltl`KHhJqtWzr8PkWa?dHpcn;3GVLz4gX_b;>l_i=;dnaCY{ zf{7Oz@-kZ#9QH;%t$6)YoAJk!(~>&TJ5^)lH$S_*P9Z$@j>234mKC9EP0rg_1-jfm zXtRMK_S|}DLzVcwUY@`HK7Z7ayZ?8lXI93KtLnR!2sSbF-@R}6)$WV*2j^*fn!Uqv zo^`UHPd`u`zJ$xsVEy~%?epEAf7@!D@%AK-`YyeLHi_ZM!XL|bJrXxaWZHJ%q4XJT zQz`D!x|gTa6604~XHaeITlLHO*~O@a%IaH8`8(~DZ*CM5US!=6e0O8{hN){s4H*k2 zu5uHeeA?h*f&R9+C$e%jo{+nK{rrp>x4T-`WG8ZqE-E|`o9wEy<W5k3-t^CUVoG_3 zFS=?m$J~s`3C$Jvak;m6)%_inGm<x0&Wha-df43R_9O$xpO$KR=ga@<6+C6WQ+oB# zo~3ge|DQhaO}g=aQsSRIc|7M19Xgl0q1EE;V$-tki{#AqubZ{mz43F%*W~bA&JR&+ zwKY9wjqki%_m+LZGPj%Oj>ld<z3#2zgQOh2tmM$SZIwpmNsBh|Ju77W++x?NYr(mZ z^R}G&iSz?G*Z&*ktb1PcCeDPZE$O@D^|IhC*`F&mb;~@K+_N{j_*`DV#BS>=oe_0j z3-|2#_~z-C9lvtE$#cyAlvrb4$a(UlS+0hP%>kEyn?bFhu_5K-x*b|dc0cZj9ge;7 z&PVE6@4OntTTUJPZ4tk>>8`rCr|@ZV*8g|u+mc;YFAKH!K4;a-Q;b$y*SdaO#0qNA z>^~Rf`&CQz@|5;vS9GJc89Kc_6KL+6esNQBy?EX4#jYPft&*v8U%Rg<zWw#Agng!_ z^!7K2_ZIQaxw{~#t#)429tHlp7tIb^&D-DpSzzgBp_Q+?8~@*L&Q*~~Y158w5Y|h4 z`<*dgu37f<M*SBLHoiB|4^7LRp4en?bsa}(7*lQk!Fl&LR@Of}KH1=-K*{_6?@nD4 zjQvp8lG`Y>QioIe{s%+8sr8ICf)-conRkCrY^p7JaKr3DXGZR#8=oE7o_e25Iq$6g zXw#NmH(Hm+bN-pzBHaG&{nZ1>g12N3+SDFQp3Nb=YSpjE7At@EjjF3oe683tS?00i znt*i~A&L@D))=3@xX3eQq3A6Ah=nm1OPp5LYJdLJJ9EPfW%o<()ixG?cPx9e>F1MN z<q8(##|I8B30a@CJM8#9JCpxw1NUCOvGsYvD%;!T@6vDg{mr?%uV44}x4?B<)-L&% z?ECAb-`ZKT%nRQsackCpyPE&zR<S|s-i($#6TR#2pJ}~&@8RQx#zDtbjIXURZx7mf zN_BD9)2gj~=gi}GC#D~<as)L&|J6)%{skW2c_LEOlWrh%v|G$W)PCl~Idcwex4-%4 zNkWH(OZzp`9~m!7w&mSF-m^11<JN+pS)W83k8&)ZAEvXXa@iW5j?G?A``7-Rmizn0 zW3l6VY(BWh$^A%PmK*ker}Ev6&*k@aXzu@e@RR*EqoO}dZhi3%F)=))SyPsVUe+qv zy{J&iQS97h{?epN>-TT2_^EcxFXGG%&hF0M-i2jfjkehAHo6{v&tlc_lC{Fksb2Gh zw*G&x%`W@e((U$hxSlHI+r9fZM`O<PjUJ$}p$Z$vqAe0Ne^+{Mj`{xlrFJmu)ipos zi@v_iR5tp}wD9S~$70Kbo2$Re-!0gm(3tFha+mssOFI)9?;O8kR`Jf*&wlD6SEKv> z29pA;wpKVk^Q~yvZa?$TiI3brO0Q*2njAc1bAfEJ*t*64>(xKrQ5JW-TeCakVa*x2 z`?J5Z`97Up$UVd8G5@4V!85j3*nSpU|NJa}{liZ7bNw^^rq18p)?-syS@hh=B_Q<M z+kTl>{?)duMr*-CZs+&UmC<GEZ+)=MaI^Rt?^qte&cOTnPmb=Jds$|6%)=LPE3>b? z_2Au^HuaQf|F-zub=M8It_|5Iv9svtq&ah()?42y@V;K$x3YGE&CcS&|7+Lo{D1A5 zikfWNpCwCcY)aUY)pV3BEnDsMdzQIQK4p?!Qg!6p&c}~;JU<q8r#M*~<l+yfjqIeX z<_10%pEPC8p~>s(yt~z=TCV>onR#n&*fx#qYj0C7UwiSJEC2RJ`_lCXHZ5ILE#Um; z=v`g#0OQZ_hes2{B){%GE1nawp6Pm7rohH?TW)4cm}T*SlH)1U+be=TzUaE$^|X5# z*Cr3`Qsc!rD-S(6FMhtU+4J}vJ3YO&Q=(m)bDoNYt+A@Be*Uzte{u2eTP1n%vnM_l zFYxUD9vFJ;&jyWE5<EilwWo34y7H*WY%8d|kX<j!aU-I=`{0D-Mk0EBE(P^RKBd;E zocsLV-DMtQT=3Q8->ozD&EIoFX@Z@UrS}<5M;AR7zST>Q{SH@9;%&IK@ajdSPfQ>7 zMAtmF?CbEzUa(>n-+8CIzZ~{vNMG(g@$^7h=-iT;wSVp_9M@6!X?-So&pdnc8@s<9 zTYY}nbKSMd|E_=Ex7b>%<&$`OM{n=KMA_N=-w!^&Y*Ap)nXI~H?y2L>%a?OXr?&>& zsCse2QpTp_&GU0T-PTt;MROu=?y;W3_x##|sHee<H#Y0n{+^XSg(*ibS%cZf;;`FU z-@pG~g?hWKdHnIu=^afcZrb%t-4mfUquz==Sz71oz5Sh5H^iJ3<&J$@`sccml5&jP zoBCN%H@?|ME;P%0?`<J2aO1l1a!|x@aP59D&FJBkm4Rla>kCgzmUuE%eV2k#&h3r! z+2_ej)SD`E<JpD(vY)b#{EM$!!)<7I=-uA-9q&IrR8d{NWcA`zd+aT0e=nEY)lgIU zIC6WTIKSQ3(j&#qPrduLc~!h=Ir=i_M(dB_!|OuM$IO*VRl4@`+wr4;=XYnGzIWwA z`CO^g%Tw=P`YOZqWc$uZ=l|W`QBt-ha=UTq>v^DHcs%uMioTMNveL_aJa<1j&D?VK z5=*3^(Hv<7C06CseFeG7YNFy3?<ffwHYq7px~SY`d9db&(28oWu+4=oR`XYdg<mb% z-)AIc{+#uUkdlz}OR0WQ!Om{8WAz*N>u^6f^Qz0^o`c&YkEfEh@|8we)q5<aJy&tM zz2f`r*l4%&UTwl>m>&npdLI>E`Cq42EaopyZlUy=<%?D^hkTv5p~`UPUUL_f!?`L( zlACKJ?{W)vo_}J+)#33(*U+POnbNlM+{-V`%+CqV|J?Jgv0D9Dqj+;<(vynW`Vm`$ zIJHtY30Z8pRPpLe=*w-smdg`Wt71|++3rc3Ocjfbd%oYfF7?)%tA8v~E%WPs7@yUz zn>(-H{ac0kir{(HyS6htn4D;waqXumNPT?z+O$Vk?n{2SsJ~^`7W?h?e_VHMdwN=A z;rYGKcGf(<tZB0EK`L0`vh^vy-k9=8*-eh~uRoi~E5H2q-ubR~vNxXgeILB=qVSi> zM3eZwd8hxVPCNMYu>P{k-%go>x|m11#U3ou_^lIHd))o!*J(Ey7=*PvT^vK^Tzg?C zSD7br*zK%mti`938zkqHAC*>7Q48J?5U2NYMjiKpB-c;H*LkNkW*yo7dFP7XyK5|~ z^o*aac{ta9`Gqx~vvcn6loxz>GA{CD!!yf^9Ug(x8I$!l__&01cxZ;3iQInionP># zhEgokTRD-5-(%al)+&9P-QjXqAjl=IuvS-iVyse8c*M_GrA>mTtM^2@tSdkMx%-55 zMAz0-rA<*Tt9F9s57cv%U&fxBR}mf->Y}#e-#>wgCmV}-4+K3sD?X7|>5@U$+5V0b zY&xv>>^8p9xVUb;*Nvac*iM;sr#^CUxhF7D+$BY5L##`k2zPJmD>Jc)Q}cMzI!_oY zee#@AEG9NlF@o>uS*1^FJ4E^e{9RHOcFd8s5PSWuRQvmP{R37BYdhBP$o5|dXBV6) zQI@l)ql9D5>h&FOSgv)SxRmku^{egNG4Im^UkYked%DC4o?fwu@wSZZo?1o0pAC!8 zzn6a$B-C|6^4%^o=H0GObUQySe)#@DPE2#^gPRIUXYYPgRJvpvu)eQDhxd<du!~&5 z_5HpsYOHqE%ezkG3GU?marx%|ynqEAC+b~N#LdczU7xIRxLNakX4K0ZDTWY6JF$tT zN}oDbf1mfdeqYBL@rfPPyE{+tzcb!1@q16brqWZUr@SsHd}Z^?x=&ng+{Etsr1!x6 z?TND|<_k<bxNf~%<K?4PyUn^!Bnj?RIx{Ee-RG@GZCCd))m|5xD0ySq!`-e=A`1`s zxa`_pT-&j?TaNR!xS-|v-kU-R>pN<68|5XHg8W^UB}P_W>o{TFxGDQ?@rG+R>-JmA zsVn{6aQ4OZ?h`VCD<w0;Kh~-!P474nlYZJwu=3h@$uljb(&bCrRE<AzwY|8pOHIDU zu6N25<(|14X5@u0@!Wjs_xc#`uUbnh-YhlrnZLK~<>u>4UO(Ql>EiR<dq19e8nQBC zj-7r_+O<FS!JC^{Gy`>tTo%2rmDPw}#$hWjf9{U!>z!-Mt1fbEdu1MEWq0t|)4d;_ zeHXkvRU@n+@%=rC(%o``iI=DC{Zjq9z2oW8t|#dy4oENJdY4{R?i5=2^R4{Pcgt04 zz6ak5Spw>dFVBye|L~jHy!t8T8++t=LrZ6NOks5TyzJM?SD{Ku2U-uLD}8ES_3pS) z-<^ikf*nGqt^cr0`==w++|V9sdbBq4ua?r|kgxn3za*b$x}IEF_;|yXGKTYsHw~gz zy|Pg+3%bKyXEE*AI%Ba%((m3>h<b1S-o0uYtH8vrclVz~%)7TrPc%kx(fQMI+<R87 zI;j-IIzv_I{_3nHzH6%)gAbj5Zqd5f{?%v3>xnPV*BuTzdzU}veaorm5!-siH?C1! z+%SLf4ReMQy-z>cvhW-+%iE_IadPPjY2GgfHXUtft^3TRII(8`s&itW7u@BkX-bXf zR2P4FcqOZ)RomUWm9vs(H{56A5uW%ePx)oYoDY(Xtr_<ZJ2k|2p0Ej>`})``H=Q$b zxsN|iZ!~Tw4k=yOF@=S1=f*#5-t(^udbgSj?YP=|xzI7^U84AcwpDUMpVzhT+46Ja zZ(${|YB48~BkSHP`NY&T)XJ?t5chO>!p!P(hm3yL^!7ey5ag_VW|cTIbsk$A`>~w@ zW%7EA(^k9^6tv{8xRCp#uVc&MAfB+Wt15=)Lrn6c9N+BRCG-2D0m}}tij$p74R54> zwOt_dp}FHs;MM#)6;FzvuW8JkQOEgh-44#%8MZ&}Wia1qTAHzkrJ{53&Ax(My@T`a z9^d$UwbZoUGz)`sUjt9~-Yj}!EXp6#mil=ov-d>FbMg_-7Rf3Fu~gSpF`e(IOmN?w z{eH<RYu6>KcCT|g&>nGY$@;mv3q@jVH}=@`8eY79`)i=TOH_j8mFD-0Bfg)j6@9U6 zRq);WPxnfkY1#W*C}_LY%c8owKCiduhuUtfyLjlO?HfbGL(b*f9xnQpZdCKeX4;w? zXC{fxJ~>r;+9}blV#nFD0*!N8Zhhag)O7Bxc_+h@pKYr6sWxrR4WFDfx<W5+>{1Jl z7us99U0dq%y3|s({uw79iC+5_;AFIXs;<-2F4Y|y?#`TZdy7_K;mf(lvg@j6aZa9Y zSorGwn#ldlpm_y_`aG?<VLioCVUMP*d+u`fG|z^lQ;&US?3vRU9KNCIE}vQMP49m- zA49CxO8A~zbt3n=xi>GXyHfbNh+lu7C+X#st0k-Jq}_Y;HRsNz_NeqJ%kIrgeOc!! z>@{~*;uXOL_xG9pPDw@6#CBHvlR9_0K=p#<lq}!S=9Sscto$8|-<)=Bm^Gt=amKn; z$uaTG8}A%qRrWnp?zC>zjN>aUo~_Mn*;})lDcQM}(esA4aQL?ctIiew%_)Dry1RAn zyF)f|37Y5i4#h3{-S|xDlF;eHyN_MH+2478wM)vBRdTTxr`&tisubjSW^tI*=LIIJ zMQxLr9gNFsgPERpACU;1`)b`|Zl4d0y}7J88$Fo}e9vf4d&m6x-QBBsHBQBL#{_M^ zzdwE;M{wuq&YyV)(i>YfKHm|K3VzeQe@@2~&T6?$PUhvdA8+j0bH5?!z{~@4Htc%4 z(WEGUb*`a-mrGhw{Fj9%k4_FP&0%}KTWm*QsJK&>ny|~0oCP*&FPydSswJF1UMSpr z<8uhljwlzmRj>TYbyo9~-i&dQTeNY~wd$YCs-JD|J~4epfJ{T@to+cg;n%;hzHxW8 z?W?o1554L-^F|!o4kwpmEkWjK2htg%_aE$Z)NxJaG7zn9V^m8@{_7hdFV0Zx)ID(y zyMc}HnW)ge2d38dRvNF~)#H$G{s>F>KcR2Qixj^}u3_6!cK!#?#+p0Rlad79?EO5g zZQB+O=g{k2m3OR}O%2vpXY?g`zq$K$$||{%ug+$%&9(2^S$9Q0{r9%5^Aytzhq)X! ziekDPd;8akOuTsT`@{|z7CrHabMx-}|B_@NoE54qYuKM>!X<j<@MQkD37LCeg?e53 zzK^f|=DJl=y*|9K4GeBb^LRO_^+l=s?EP2X{W>jO{P$_E@p_rYNvd3zzFul8*=v%O zqVByVBgisq+Ke><x$-XO3zu3vyU6)}5}%<Y_wzG{oHlmOXjgSHD81v?S7mpp(^2rJ zhz0MArl&e~5-(PpG+xPUG}}3+cj6oE?pnrHbx!`Bi{>Ax`@5h2iSWehAzw??z4DK^ zJaPEG^qa$yIXjg;8C~G|At>C$erTQjiOUDxM%;VEr1WXms<!9Hc?IqA6wRv*6IQQc z`<@on*14*{EAjDN(a%mTGjx=8hM4wVcMc8jIN|ibXyb)Cah-N!zZ#vxE=QDf>`S_K zUrQEkPBqjI_z-@_MajQhi&2N`chbD`f$I%!Y!(-MK2giz%k#$^0`pkxs*--~Z&;-# zJ~7)s|L$xhBev>!O1;r4>!S-E{Ze}KIl`p*(}iQZRSoiEPV~ttZJMXkH#ghZ*X13{ zJC=Kb69s3qTihxWxH#|jrWwaAZ0;s5`5nYFeb>dC3zRk~-Kq@vx}oC@Z}xVLfPL;R z*AkmvF>JeJF~hsm?pY+Sq4g{Qmf38LmFDhD+8bYLE;Q8T)#2tmXy7IM*EUM*a|oNQ z!+oa8kkWM>cUbQ5?s<F9{rm1O6Js0L8|J&F=w7%ET4_?Obfc+NJeu)f{FV-n;4_;q zU22_i>VvqgmQs*d^~R~8SNA4;{uRd)68ScV<GlK89kcZHqK*0t$D?%4+%%pfey-9o zjwMcHqS^s{B_pTM>#JUAoZ$|Y`F*rbTWlHMtr!OGlUIXIoA-3IaG(G1`786UPLE`t zDZK@jrJGM_E!ff#5^!}w`WDu2ts>{tbk2OuQ=7M0VSd6wJ4Ue;Pn+02AIS~AnyqO1 zL{ztCgIu*<LFwm2xhVqK8#kn!*SWD-E<;19a8=a}*=)C4GG_v}UkJQfn^Y_uohbKA zA=@yp%6*1~#k(CFUYF~w`C<9Zd9i+CGONq2_iu9gs?MKd6Kcqvv4d0gx88^U3(po` z-@TtvVB)76{c9h57PBksoRPtO%Z^DxQE9oAbydQ=ugQtJVto6}x&pX1adDfrzd6@k z`ISxf@XuK+x27LjX1wO`k%U6Qm1|Zle7;a}gW3hNRRSe83^uYKeD+CeL^EFzo481j z<#U$L2iGLs56!FWq*ofesWp+QmbYC#&HJyP;7)Pw-2E@bCx8}j^>GXCWBnpJvHNJ) z!MW;6t{bOk7cN-Y5pulcR{Y)f8R}{=x4J!8bK;^4)*Xy?kQRKYYF4;!mCRg`e{2_m zUDk<3hnDwsObNYubjI<Qx`LSxbMkeR#2GqHoK*@s+d0+g#*?>ml`h@-8_w}r>*+xo zhKDi-_D!+w^oZQZ`Elu@*aaPTRFtmX{mGT|aOs)Px@-kZ1tMZ%uT{)m?unYRtNrc{ zTV*Bjov+txD#aRG92Iv+&Z&ERyzy~%<EbQWnODsIjO~}r+0R^Fq*S!KV@~7Qo7`1< zelg#5eIh2Psi*YR<%wVChFwCRb7WcK#3pw6<%Bb{r5Er%+{0+2qBMQ?^TkVV>^XQd z!s37K_cI~_6PKO)T<p_vB30?reT~$!N5u}#X}bIU!Dkab?t^9mv*(w4x9I7W&)>@y zvE%b%gY#FLm4f1PK3<!8V~<nAe<oR%CvF=nv)40j4k=~4`19u6^?e;v1SW1%x}^0W z@vQfg^G<0!#}BP%c739=(babCQQak+$r=S+9eb3O-fpaxwe@yPdDVI1u2PVs@Vd_% zE_^o93HiI}_nY#=PFDrL>+~FwS}?U^iqOQ@8+Kk&tUFjYW#8j>o`M^Ho=f=t*hjGP z&R%^jmUa8>)n4t|X(sCWq^u$1y>j?_wT<!*e{PgF)7}eO6Q<tfzVYPE^9c(Nu`^uR z=Co71dSY!`t0`OZAwC}mmcwpcwKtbUKU%k{-Jt)<-5CrK{hWt-*VUM9<9Ooq=BOQu z@S=pI$GSQ)dw972w{7FxDZ<UYpI7Io+J^Rh9@+`_&ON=+^7-}Mm}3zS46na4_NiI= zvCC?<NVJFGtQT^RZ>{BAFKuyO{9VoVB;oze@kRR+<m6+1-g4OOyd+kkNY_C4v)8+W zHmr()GvB;wTlY~hXU#l@um?_$0^e{~>i<qw{J*gAvGK-2xgOnw3hv$a74^@1-T9+# zu6I50`)lp=*XfGVbEki@DwqAd(X#q#$AgK1SDAO5{Cs0d3wOh}RZ>xf->oxkV<%Ry z{oeGx+m^YX$?)7c?%3boH~#*sBKD6>Sl{uXN#_guvR9MJ9=cDE@HwJ;Y)_MYVM*Ps z?+4ruEWc5+??1!C>Mq;nyRRSKusO+o_*DF%cgMRAc(1Y&aR{wHIPZ1z#^;|63JMq8 zExFXa=K98u6|BGCJUP_z-zbNteZ{L28rLn^qn`*RS?lqvUDbKRZpz6U#isXT^IybH z__eNJ{z0)D{Rg8rJTEtPpa1d0Zzi*}h1r~Y7oRJ-Udv_n-119J>A__ydyYGOFJGON zVfs|<QrlhSk2Yr0_9vY`fA`(3z^fuT^Ij{yx%r>Bx-Rf)Xdch3-H-1-*mG#ruTJUy z=nc<PSln%o72Z&qp?ahE6Yn=WuY=E${0`YAr#AdnDVh88(=q;+wc)q3HyPbO`}pG% zAG7?s-Y=_a14HMZeqYbSdFH$D?a4WYVn$W=)w-v5cI5gO_pdr<f9|gyb6)V(lGR7& zmKT5C|It|HThIK~&bhDjbY7iguuliAI4okTjroydy>0D@luK<Tw*PJJ8b!*!vozZ$ zIx*!;_g$WfL)-_~JOr;ue8MyDpi{tdroGJFHa0hpaEdgUC(pa@@rLQ&TI<i{$F@nG z;$HKI`?mPg2=Efec`^;rS2oXhp>*)GzGLX}lz?e1x(+|DKYS)|{!E_rKk<*;9yZ_+ zZ=A~`mvkMRGDW$XPuA;P4ts9@@yX3MciT7KxG^JRHd{4lg=qbR5W^W$<``AqdUKV1 z(`AYN<HbeB=?|?wJ?-T`|NSLsdFcPdro=~<pT*Xn7u~w-_~Vlw&&>M%d}2$>la)7f z+d<1L`{W%Bcf2@oaLEpP^Eao=l5OP9pFBHd=4w6_dB2$x@0L}7Ec!7~i!IbtGk)HW zslsZ%Ydvh`{Hw~|+aCFI^mXy$Pi>&JozL$(&boT!H@})))qml`!rR~8m>>JC!e-v& zDc16`(<Fq2jXzdA5ByqTBj^8Zk{i#eSJzLT)LfkZ@@u72ndFm?6@GSZwfiSLOn$B^ zZ!@LD8MeCBdsi(RzpBc8v${)@oSWT$cuY9{xN+_6?}dkJn}sG#;=8|U)$Gl;!Qm(q zf2q?k^zO_F6+dpt*|MwNy_<gHAv62rO^5aRj~5FW{Aza%$^V$+lyvFg!&Sd3%Wi{4 zyyrdcU*UEyJ<4U(v(wW5T`wN`nZ+k{!PbBEve18tmu=+y%QmLJSA6&Gcj~1L7mFuP zp2T;5-KyqIx79%FQs+o4>T+Zd?Ck8E$exsxG;ejx!ib>DY*UNOSA{Ev|Go8eWyHQ0 z_8H5LKen9s=H~PRdyc-Ixw~NQKdbDojW@Zk9ql&wdEnrZ*P*4sWw)P$VoTV)Ey-Z( zBoEI8o<2P$S`iBVr;jW-{8dP368GGS&o*-B?HzM+e2N<X><m{^Q}g%qu(6Syv2+#p hrrUa8_f-6oKRq)kS~8!piGhKE!PC{xWt~$(697s(al-%r literal 0 HcmV?d00001 diff --git a/src/cuda/ideasForCommunication.txt b/src/cuda/ideasForCommunication.txt new file mode 100644 index 000000000..44f43cc5c --- /dev/null +++ b/src/cuda/ideasForCommunication.txt @@ -0,0 +1,40 @@ + +HybridUniformNeighborScheme scheme ( blockstorage, blockSelectorForGPU); + +# Add field that is communicated +scheme.addPackInfo( FieldPackInfo(cpuFieldID), GPUFieldPackInfo(gpuFieldID) ); +# possibly add further fields which are sent in same message +scheme.addPackInfo( .... ) + + +GPUFieldPackInfo Interface + + MemberVariable: the GPUField + - pack ( direction, device_ptr, offset ) # -> calls packing kernel that copies the slice before ghost layer to the device_ptr buffer + - getConstantByteSizeForDirection( d ) # returns the required buffer size to pack for a neighbor in the given direction + - unpack (direction, device_ptr, offset ) + + +# Pseudo code for scheme.startCommunication +startCommunication: + 1) Pack everything into buffers + For all blocks + For all neighbors of current block + if currentBlock=gpu: + if neighbor is localcpu + -> call localToCPU + if neighbor is localgpu + -> + if neighbor is somewhere else + -> call pack on gpu + else currentBlock=cpu: + similar to above + + +How to send on GPU? + - use blocking send on GPU and different streams for all blocks + - use ISend ... how to manage the exchange of MPI_Request and the wait? + + + + + diff --git a/src/field/Field.h b/src/field/Field.h index 38c402dc5..c7785e80e 100644 --- a/src/field/Field.h +++ b/src/field/Field.h @@ -181,17 +181,6 @@ namespace field { //**************************************************************************************************************** - //** Low Level Functions ********************************************************************************** - /*! \name Low Level Functions - only use when absolutely necessary */ - //@{ - T * data(); - const T * data() const; - - shared_ptr< FieldAllocator<T> > getAllocator() const; - //@} - //**************************************************************************************************************** - - //** Equality Checks ********************************************************************************************* /*! \name Equality Checks */ //@{ @@ -297,6 +286,17 @@ namespace field { //**************************************************************************************************************** + //** Pointer to internal memory - use with care! ********************************************************** + /*! \name Pointer to internal memory - use with care! */ + //@{ + T * data() { return values_; } + const T * data() const { return values_; } + T * dataInner() { return valuesWithOffset_; } + const T * dataInner() const { return valuesWithOffset_; } + + shared_ptr< FieldAllocator<T> > getAllocator() const; + //@} + //**************************************************************************************************************** //** Static cached end iterators ********************************************************************************* diff --git a/src/field/Field.impl.h b/src/field/Field.impl.h index 2feeefaf0..39b9b227d 100644 --- a/src/field/Field.impl.h +++ b/src/field/Field.impl.h @@ -1136,32 +1136,6 @@ namespace field { //=================================================================================================================== - //******************************************************************************************************************* - /*! Returns a pointer to the memory region where field data is stored - * - * For additional information about memory layout, have a look at the xOff() and xStride() functions - * If you store a pointer to the internal field data, you have to make sure the data is not freed by the field class. - * See documentation of getAllocator() for this - */ - //******************************************************************************************************************* - template<typename T, uint_t fSize_ > - T * Field<T,fSize_>::data() - { - return values_; - } - - //******************************************************************************************************************* - /*! Returns a pointer to the memory region where field data is stored - * - * see documentation of nonconst version - */ - //******************************************************************************************************************* - template<typename T, uint_t fSize_ > - const T * Field<T,fSize_>::data() const - { - return values_; - } - //******************************************************************************************************************* /*! Returns internal data allocator * diff --git a/src/field/Layout.h b/src/field/Layout.h new file mode 100644 index 000000000..ed78ba0e9 --- /dev/null +++ b/src/field/Layout.h @@ -0,0 +1,43 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file Layout.h +//! \ingroup field +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#pragma once + + +namespace walberla { +namespace field { + + + /** + * \brief Layout for field ( + * \ingroup field + */ + enum Layout { + fzyx = 0, //!< Value-sorted data layout (f should be outermost loop) + zyxf = 1 //!< Cell-sorted data layout, (f should be innermost loop) + }; + + + +} // namespace field +} // namespace walberla + + diff --git a/src/field/communication/MPIDatatypes.h b/src/field/communication/MPIDatatypes.h index 0aaff22d4..3e1fe5f88 100644 --- a/src/field/communication/MPIDatatypes.h +++ b/src/field/communication/MPIDatatypes.h @@ -65,8 +65,8 @@ namespace communication { * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatype( const Field< T, fSize > & field ); +template<typename Field_T> +MPI_Datatype mpiDatatype( const Field_T & field ); @@ -92,8 +92,8 @@ MPI_Datatype mpiDatatype( const Field< T, fSize > & field ); * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, +template<typename Field_T> +MPI_Datatype mpiDatatypeSlice( const Field_T & field, const cell_idx_t xBeg, const cell_idx_t yBeg, const cell_idx_t zBeg, const cell_idx_t fBeg, const cell_idx_t xEnd, const cell_idx_t yEnd, const cell_idx_t zEnd, const cell_idx_t fEnd ); @@ -115,8 +115,8 @@ MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, cell_idx_t f = 0 ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, cell_idx_t f = 0 ); @@ -137,8 +137,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ); @@ -158,8 +158,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ); +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ); @@ -176,8 +176,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field ); @@ -196,8 +196,8 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field, const uint_t numGhostLayers ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field, const uint_t numGhostLayers ); @@ -217,8 +217,8 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice = false ); @@ -238,8 +238,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice = false ); @@ -259,8 +259,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice = false, const cell_idx_t f = 0 ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice = false, const cell_idx_t f = 0 ); //====================================================================================================================== @@ -280,8 +280,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ); //====================================================================================================================== @@ -300,8 +300,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ); //====================================================================================================================== @@ -320,8 +320,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness = 1, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness = 1, const bool fullSlice = false ); //====================================================================================================================== @@ -341,8 +341,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness = 1, const cell_idx_t f = 0, const bool fullSlice = false ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness = 1, const cell_idx_t f = 0, const bool fullSlice = false ); //====================================================================================================================== @@ -363,8 +363,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ); //====================================================================================================================== @@ -384,12 +384,12 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz * \returns The MPI datatype */ //====================================================================================================================== -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ); +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ); } // namespace communication } // namespace field } // namespace walberla -#include "MPIDatatypes.impl.h" \ No newline at end of file +#include "MPIDatatypes.impl.h" diff --git a/src/field/communication/MPIDatatypes.impl.h b/src/field/communication/MPIDatatypes.impl.h index 3a801a671..1bb4bf3a6 100644 --- a/src/field/communication/MPIDatatypes.impl.h +++ b/src/field/communication/MPIDatatypes.impl.h @@ -28,11 +28,12 @@ namespace field { namespace communication { -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, +template<typename Field_T> +MPI_Datatype mpiDatatypeSlice( const Field_T & field, const cell_idx_t xBeg, const cell_idx_t yBeg, const cell_idx_t zBeg, const cell_idx_t fBeg, const cell_idx_t xEnd, const cell_idx_t yEnd, const cell_idx_t zEnd, const cell_idx_t fEnd ) { + typedef typename Field_T::value_type T; int sizes[4]; int subsizes[4]; int starts[4]; @@ -101,8 +102,8 @@ MPI_Datatype mpiDatatypeSlice( const Field< T, fSize > & field, -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatype( const Field< T, fSize > & field ) +template<typename Field_T> +MPI_Datatype mpiDatatype( const Field_T & field ) { return mpiDatatypeSlice( field, cell_idx_t( 0 ), cell_idx_t( 0 ), cell_idx_t( 0 ), cell_idx_t( 0 ), @@ -111,8 +112,8 @@ MPI_Datatype mpiDatatype( const Field< T, fSize > & field ) } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, cell_idx_t f /*= 0*/ ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, cell_idx_t f /*= 0*/ ) { return mpiDatatypeSlice( field, interval.xMin(), interval.yMin(), interval.zMin(), f, @@ -120,8 +121,8 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const cell_idx_t fBeg, const cell_idx_t fEnd ) { return mpiDatatypeSlice( field, interval.xMin(), interval.yMin(), interval.zMin(), fBeg, @@ -129,9 +130,11 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ) +template<typename Field_T> +MPI_Datatype mpiDatatypeSliceXYZ( const Field_T & field, const CellInterval & interval, const std::set<cell_idx_t> & fs ) { + typedef typename Field_T::value_type T; + MPI_Datatype newType = MPI_DATATYPE_NULL; int sizes[3]; @@ -206,14 +209,14 @@ MPI_Datatype mpiDatatypeSliceXYZ( const Field< T, fSize > & field, const CellInt } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field ) { return mpiDatatypeWithGhostLayer( field, field.nrOfGhostLayers() ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & field, const uint_t numGhostLayers ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField_T & field, const uint_t numGhostLayers ) { const cell_idx_t xBeg = - cell_idx_c( numGhostLayers ); const cell_idx_t yBeg = - cell_idx_c( numGhostLayers ); @@ -231,14 +234,14 @@ MPI_Datatype mpiDatatypeWithGhostLayer( const GhostLayerField< T, fSize > & fiel } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice /*= false*/ ) { return mpiDatatypeGhostLayerOnly( field, field.nrOfGhostLayers(), dir, fullSlice ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField_T & field, const uint_t thickness, const stencil::Direction dir, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -250,8 +253,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnly( const GhostLayerField< T, fSize > & fiel } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice /*= false*/, const cell_idx_t f /*= 0*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice /*= false*/, const cell_idx_t f /*= 0*/ ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -259,8 +262,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, f ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const cell_idx_t fBeg, const cell_idx_t fEnd ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -268,8 +271,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const bool fullSlice, const std::set<cell_idx_t> & fs ) { CellInterval ci; field.getGhostRegion( dir, ci, cell_idx_c( field.nrOfGhostLayers() ), fullSlice ); @@ -277,8 +280,8 @@ MPI_Datatype mpiDatatypeGhostLayerOnlyXYZ( const GhostLayerField< T, fSize > & f return mpiDatatypeSliceXYZ( field, ci, fs ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -289,8 +292,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayer( const GhostLayerField< T, fSize > return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const cell_idx_t f /*= 0*/, const bool fullSlice /*= false*/ ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness /*= 1*/, const cell_idx_t f /*= 0*/, const bool fullSlice /*= false*/ ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -298,8 +301,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz return mpiDatatypeSliceXYZ( field, ci, f ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const cell_idx_t fBeg, const cell_idx_t fEnd, const bool fullSlice ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -307,8 +310,8 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz return mpiDatatypeSliceXYZ( field, ci, fBeg, fEnd ); } -template<typename T, uint_t fSize> -MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSize > & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ) +template<typename GhostLayerField_T> +MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField_T & field, const stencil::Direction dir, const uint_t thickness, const std::set<cell_idx_t> & fs, const bool fullSlice ) { CellInterval ci; field.getSliceBeforeGhostLayer( dir, ci, cell_idx_c( thickness ), fullSlice ); @@ -319,4 +322,4 @@ MPI_Datatype mpiDatatypeSliceBeforeGhostlayerXYZ( const GhostLayerField< T, fSiz } // namespace communication } // namespace field -} // namespace walberla \ No newline at end of file +} // namespace walberla diff --git a/src/field/iterators/FieldIterator.h b/src/field/iterators/FieldIterator.h index 6a831fb54..a1c328e7a 100644 --- a/src/field/iterators/FieldIterator.h +++ b/src/field/iterators/FieldIterator.h @@ -26,6 +26,7 @@ #include "core/DataTypes.h" #include "core/cell/Cell.h" #include "core/debug/Debug.h" +#include "field/Layout.h" #include "stencil/Directions.h" @@ -39,16 +40,6 @@ namespace walberla { namespace field { - /** - * \brief Layout for field ( - * \ingroup field - */ - enum Layout { - fzyx = 0, //!< Value-sorted data layout (f should be outermost loop) - zyxf = 1 //!< Cell-sorted data layout, (f should be innermost loop) - }; - - template<typename T, uint_t fSize_> class Field; // forward for friend declaration diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 00c3bf12b..55b690d6c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,6 +5,7 @@ include_directories( ${walberla_BINARY_DIR}/src ) # for generated headers add_subdirectory( blockforest ) add_subdirectory( boundary ) add_subdirectory( core ) +add_subdirectory( cuda ) add_subdirectory( domain_decomposition ) add_subdirectory( fft ) add_subdirectory( field ) diff --git a/tests/cuda/CMakeLists.txt b/tests/cuda/CMakeLists.txt new file mode 100644 index 000000000..89d914803 --- /dev/null +++ b/tests/cuda/CMakeLists.txt @@ -0,0 +1,15 @@ +################################################################################################### +# +# Tests for cuda +# +################################################################################################### + +waLBerla_compile_test( FILES FieldTransferTest ) +waLBerla_execute_test( NAME FieldTransferTest ) + +waLBerla_compile_test( FILES SimpleKernelTest.cpp Kernels.cu DEPENDS blockforest timeloop gui ) +waLBerla_execute_test( NAME SimpleKernelTest ) + +waLBerla_compile_test( FILES CudaMPI DEPENDS blockforest timeloop gui ) +waLBerla_execute_test( NAME CudaMPI ) + diff --git a/tests/cuda/CudaMPI.cpp b/tests/cuda/CudaMPI.cpp new file mode 100644 index 000000000..ffcb89260 --- /dev/null +++ b/tests/cuda/CudaMPI.cpp @@ -0,0 +1,144 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file CudaMPI.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + + +#include "blockforest/Initialization.h" + + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" +#include "core/logging/Logging.h" +#include "core/mpi/Datatype.h" + +#include "cuda/GPUField.h" + +#include "field/communication/MPIDatatypes.h" +#include "field/AddToStorage.h" +#include "timeloop/SweepTimeloop.h" + +#include "gui/Gui.h" + + +using namespace walberla; + + +void fullFieldTransfer() +{ + Field<double,4> h_f1 ( 3, 4, 2, 42.0, field::fzyx ); + Field<double,4> h_f2 ( 3, 4, 2, 27.0, field::fzyx ); + + cuda::GPUField<double> d_f ( 3, 4, 2, 4, 8.0, field::fzyx ); + + + // Transfer h_f1 from CPU to GPU d_f + + auto h_f1_datatype = mpi::Datatype ( field::communication::mpiDatatype( h_f1 ) ); + auto h_f2_datatype = mpi::Datatype ( field::communication::mpiDatatype( h_f2 ) ); + auto d_f_datatype = mpi::Datatype ( field::communication::mpiDatatype( d_f ) ); + + WALBERLA_LOG_DEVEL("ISend"); + MPI_Request request1; + MPI_Isend( h_f1.data(), 1, h_f1_datatype, 0, 0, MPI_COMM_WORLD, &request1 ); + + WALBERLA_LOG_DEVEL("IRecv"); + MPI_Request request2; + MPI_Irecv( d_f.data(), 1, d_f_datatype, 0, 0, MPI_COMM_WORLD, &request2 ); + + MPI_Wait( &request1, MPI_STATUS_IGNORE ); + MPI_Wait( &request2, MPI_STATUS_IGNORE ); + + // Transfer GPU field d_f back to CPU into h_f2 + + MPI_Request request3; + WALBERLA_LOG_DEVEL("ISend"); + MPI_Isend( d_f.data(), 1, d_f_datatype, 0, 0, MPI_COMM_WORLD , &request3 ); + + MPI_Request request4; + WALBERLA_LOG_DEVEL("IRecv"); + MPI_Irecv( h_f2.data(), 1, h_f2_datatype, 0, 0, MPI_COMM_WORLD, &request4 ); + + MPI_Wait( &request3, MPI_STATUS_IGNORE ); + MPI_Wait( &request4, MPI_STATUS_IGNORE ); + + WALBERLA_CHECK_EQUAL( h_f1, h_f2 ); +} + + +void blockStorageAndGui( int argc, char ** argv ) +{ + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid( + uint_c(1) , uint_c(1), uint_c(1), // number of blocks in x,y,z direction + uint_c(5) , uint_c(7), uint_c(3), // number of blocks in x,y,z direction + real_c(1), // dx: length of one cell in physical coordinates + false, // one block per process? - "false" means all blocks to one process + true, true, true ); // no periodicity + + typedef GhostLayerField<real_t,1> ScalarField; + BlockDataID cpuFieldID1 = field::addToStorage<ScalarField>( blocks, "CPUField 1", real_c(42), field::fzyx, uint_c(1) ); + BlockDataID cpuFieldID2 = field::addToStorage<ScalarField>( blocks, "CPUField 2", real_c(0), field::fzyx, uint_c(1) ); + + typedef cuda::GPUField<real_t> GPUField; + BlockDataID gpuFieldID = blocks->addStructuredBlockData< GPUField >( + [&] ( IBlock * block, StructuredBlockStorage * const s ) { + return new GPUField( s->getNumberOfXCells(*block), + s->getNumberOfYCells(*block), + s->getNumberOfZCells(*block), + 1 , 1.0); + }, + "GPU Field" ); + + for( auto blockIt = blocks->begin(); blockIt != blocks->end(); ++blockIt ) + { + // get the field stored on the current block + ScalarField * h_f1 = blockIt->getData<ScalarField>( cpuFieldID1 ); + ScalarField * h_f2 = blockIt->getData<ScalarField>( cpuFieldID2 ); + GPUField * d_f = blockIt->getData<GPUField> ( gpuFieldID ); + + auto h_f1_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *h_f1, stencil::W, 1, true ) ); + auto h_f2_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *h_f2, stencil::W, 1, true ) ); + auto d_f_datatype = mpi::Datatype ( field::communication::mpiDatatypeSliceBeforeGhostlayer( *d_f , stencil::W, 1, true ) ); + + MPI_Sendrecv( const_cast<double *>( h_f1->data() ), 1, h_f1_datatype, 0, 0, + d_f->data(), 1, d_f_datatype , 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); + + MPI_Sendrecv( d_f->data(), 1, d_f_datatype, 0, 0, + h_f2->data(), 1, h_f2_datatype, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE ); + + + } + + SweepTimeloop timeloop( blocks, 4 ); + GUI gui( timeloop, blocks, argc, argv ); + gui.run(); + +} + + +int main( int argc, char ** argv ) +{ + debug::enterTestMode(); + walberla::Environment walberlaEnv( argc, argv ); + + fullFieldTransfer(); + //blockStorageAndGui(argc, argv); + + + return 0; +} diff --git a/tests/cuda/FieldTransferTest.cpp b/tests/cuda/FieldTransferTest.cpp new file mode 100644 index 000000000..e37768d9d --- /dev/null +++ b/tests/cuda/FieldTransferTest.cpp @@ -0,0 +1,65 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldTransferTest.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" + +#include "field/Field.h" + +#include "cuda/GPUField.h" +#include "cuda/FieldCopy.h" + + +using namespace walberla; + +void simpleTransfer() +{ + Field<double,4> h_f1 ( 16, 20, 30, 42.0, field::fzyx ); + Field<double,4> h_f2 ( 16, 20, 30, 0.0, field::fzyx ); + + + cuda::GPUField<double> d_f ( 16,20,30,4,0, field::fzyx ); + + WALBERLA_CHECK_EQUAL( h_f1.xSize() ,d_f.xSize() ); + WALBERLA_CHECK_EQUAL( h_f1.ySize() ,d_f.ySize() ); + WALBERLA_CHECK_EQUAL( h_f1.zSize() ,d_f.zSize() ); + WALBERLA_CHECK_EQUAL( h_f1.fSize() ,d_f.fSize() ); + WALBERLA_CHECK_EQUAL( h_f1.layout() ,d_f.layout() ); + + + cuda::fieldCpy( d_f, h_f1 ); + cuda::fieldCpy( h_f2, d_f ); + + WALBERLA_CHECK_EQUAL( h_f1, h_f2 ); +} + + + + +int main( int argc, char ** argv ) +{ + debug::enterTestMode(); + walberla::Environment walberlaEnv( argc, argv ); + + simpleTransfer(); + + return 0; +} diff --git a/tests/cuda/Kernels.cu b/tests/cuda/Kernels.cu new file mode 100644 index 000000000..fb4228113 --- /dev/null +++ b/tests/cuda/Kernels.cu @@ -0,0 +1,33 @@ + +#include <iostream> + +#include "cuda/FieldAccessor.h" +#include "cuda/FieldIndexing.h" + +namespace walberla { + + +namespace cuda { + template<typename T> + class GPUField; +} + +__global__ void kernel_double( cuda::FieldAccessor<double> f ) +{ + f.set( blockIdx, threadIdx ); + f.get() *= 2.0; +} + +void kernel_double_field( const cuda::GPUField<double> & field ) +{ + using namespace std; + cuda::FieldIndexing<double> iter = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( field, 1, stencil::E, true ); + std::cout << "Kernel call dims " + << iter.blockDim().x << "," + << iter.gridDim().x << "," + << iter.gridDim().y << "," + << iter.gridDim().z << endl; + kernel_double<<< iter.gridDim(), iter.blockDim() >>> ( iter.gpuAccess() ); +} + +} // namespace walberla diff --git a/tests/cuda/SimpleKernelTest.cpp b/tests/cuda/SimpleKernelTest.cpp new file mode 100644 index 000000000..8313947ed --- /dev/null +++ b/tests/cuda/SimpleKernelTest.cpp @@ -0,0 +1,115 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see <http://www.gnu.org/licenses/>. +// +//! \file FieldTransferTest.h +//! \author Martin Bauer <martin.bauer@fau.de> +// +//====================================================================================================================== + +#include "cuda/FieldIndexing.h" +#include "blockforest/Initialization.h" + +#include "core/debug/TestSubsystem.h" +#include "core/Environment.h" + +#include "field/GhostLayerField.h" + +#include "cuda/GPUField.h" +#include "cuda/FieldCopy.h" +#include "cuda/Kernel.h" +#include "gui/Gui.h" +#include "timeloop/SweepTimeloop.h" + +using namespace walberla; + +namespace walberla{ +void kernel_double_field( const cuda::GPUField<double> & field ); + +void kernel_double( cuda::FieldAccessor<double> f ); +} + + + +GhostLayerField<real_t,1> * createCPUField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new GhostLayerField<real_t,1> ( + storage->getNumberOfXCells( *block ), // number of cells in x direction + storage->getNumberOfYCells( *block ), // number of cells in y direction + storage->getNumberOfZCells( *block ), // number of cells in z direction + 1, // number of ghost layers + real_t(1), // initial value + field::fzyx); +} + +cuda::GPUField<real_t> * createGPUField( IBlock* const block, StructuredBlockStorage* const storage ) +{ + return new cuda::GPUField<real_t> ( + storage->getNumberOfXCells( *block ), // number of cells in x direction + storage->getNumberOfYCells( *block ), // number of cells in y direction + storage->getNumberOfZCells( *block ), // number of cells in z direction + 1, // fSize + 1, // number of ghost layers + field::fzyx ); +} + + +int main( int argc, char ** argv ) +{ + walberla::Environment env( argc, argv ); + debug::enterTestMode(); + + shared_ptr< StructuredBlockForest > blocks = blockforest::createUniformBlockGrid ( + uint_t(1), uint_t(1), uint_t(1), // number of blocks in x,y,z direction + uint_t(14), uint_t(14), uint_t(14), // how many cells per block (x,y,z) + real_c(0.5), // dx: length of one cell in physical coordinates + false, // one block per process - "false" means all blocks to one process + false, false, false ); // no periodicity + + + + BlockDataID cpuFieldID = blocks->addStructuredBlockData< GhostLayerField<real_t,1> > ( &createCPUField, "CPUField" ); + + + BlockDataID gpuFieldID = blocks->addStructuredBlockData< cuda::GPUField<real_t> > ( &createGPUField, "GPUField" ); + + for ( auto blockIterator = blocks->begin(); blockIterator != blocks->end(); ++blockIterator ) + { + IBlock & currentBlock = *blockIterator; + + // get the field stored on the current block + auto cpuField = currentBlock.getData< GhostLayerField<real_t,1> > ( cpuFieldID ); + auto gpuField = currentBlock.getData< cuda::GPUField<real_t> > ( gpuFieldID ); + + cuda::fieldCpy( *gpuField, *cpuField ); + + auto myKernel = cuda::make_kernel( &kernel_double ); + auto indexing = cuda::FieldIndexing<double>::sliceBeforeGhostLayerXYZ( *gpuField, 1, stencil::W, true ); + myKernel.addFieldIndexingParam(indexing); + myKernel(); + + cuda::fieldCpy( *cpuField, *gpuField ); + + WALBERLA_ASSERT_EQUAL( cpuField->get(0,0,0), 2 ); + } + + + //SweepTimeloop timeloop ( blocks, uint_t(1) ); + //timeloop.run(); + //GUI gui ( timeloop, blocks, argc, argv ); + //gui.run(); + + + return 0; +} -- GitLab