Skip to content
Snippets Groups Projects
pairs.hpp 20.85 KiB
#include <algorithm>
#include <cmath>
#include <iostream>
#include <memory>
#include <vector>
//---
#ifdef PAIRS_TARGET_CUDA
#   include "devices/cuda.hpp"
#else
#   include "devices/dummy.hpp"
#endif

#include "domain/regular_6d_stencil.hpp"

#pragma once

#ifdef DEBUG
#   include <assert.h>
#   define PAIRS_DEBUG(...)     fprintf(stderr, __VA_ARGS__)
#   define PAIRS_ASSERT(a)      assert(a)
#   define PAIRS_EXCEPTION(a)
#else
#   define PAIRS_DEBUG(...)
#   define PAIRS_ASSERT(a)
#   define PAIRS_EXCEPTION(a)
#endif

#define PAIRS_ERROR(...)        fprintf(stderr, __VA_ARGS__)
#define MIN(a,b)                ((a) < (b) ? (a) : (b))
#define MAX(a,b)                ((a) > (b) ? (a) : (b))

namespace pairs {

typedef int array_t;
typedef int property_t;
typedef int layout_t;

enum PropertyType {
    Prop_Invalid = -1,
    Prop_Integer = 0,
    Prop_Float,
    Prop_Vector
};

enum DataLayout {
    Invalid = -1,
    AoS = 0,
    SoA
};

enum DomainPartitioning {
    DimRanges = 0,
    BoxList,
};

template<typename real_t>
class Vector3 {
private:
    real_t x, y, z;

public:
    Vector3() : x((real_t) 0.0), y((real_t) 0.0), z((real_t) 0.0) {}
    Vector3(real_t v) : x(v), y(v), z(v) {}
    Vector3(real_t x_, real_t y_, real_t z_) : x(x_), y(y_), z(z_) {}
};

class Array {
protected:
    array_t id;
    std::string name;
    void *h_ptr, *d_ptr;
    size_t size;
    bool is_static;

public:
    Array(array_t id_, std::string name_, void *h_ptr_, void *d_ptr_, size_t size_, bool is_static_ = false) :
        id(id_),
        name(name_),
        h_ptr(h_ptr_),
        d_ptr(d_ptr_),
        size(size_),
        is_static(is_static_) {

        PAIRS_ASSERT(size_ > 0);
    }

    property_t getId() { return id; }
    std::string getName() { return name; }
    void *getHostPointer() { return h_ptr; }
    void *getDevicePointer() { return d_ptr; }
    void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; }
    void setSize(size_t size_) { size = size_; }
    size_t getSize() { return size; };
    bool isStatic() { return is_static; }
};

class Property {
protected:
    property_t id;
    std::string name;
    void *h_ptr, *d_ptr;
    PropertyType type;
    layout_t layout;
    size_t sx, sy;

public:
    /*Property(property_t id_, std::string name_, void *h_ptr_, void *d_ptr_, PropertyType type_, layout_t layout, size_t sx_) :
        id(id_),
        name(name_),
        h_ptr(h_ptr_),
        d_ptr(d_ptr_),
        type(type_),
        layout(Invalid),
        sx(sx_), sy(1) {

        PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0);
    }*/

    Property(property_t id_, std::string name_, void *h_ptr_, void *d_ptr_, PropertyType type_, layout_t layout_, size_t sx_, size_t sy_=1) :
        id(id_),
        name(name_),
        h_ptr(h_ptr_),
        d_ptr(d_ptr_),
        type(type_),
        layout(layout_),
        sx(sx_), sy(sy_) {

        PAIRS_ASSERT(type != Prop_Invalid && layout_ != Invalid && sx_ > 0 && sy_ > 0);
    }

    property_t getId() { return id; }
    std::string getName() { return name; }
    void *getHostPointer() { return h_ptr; }
    void *getDevicePointer() { return d_ptr; }
    void setPointers(void *h_ptr_, void *d_ptr_) { h_ptr = h_ptr_, d_ptr = d_ptr_; }
    void setSizes(size_t sx_, size_t sy_) { sx = sx_, sy = sy_; }
    size_t getTotalSize() { return sx * sy * getElemSize(); };
    PropertyType getType() { return type; }
    layout_t getLayout() { return layout; }
    size_t getElemSize() {
        return  (type == Prop_Integer) ? sizeof(int) :
                (type == Prop_Float) ? sizeof(double) :
                (type == Prop_Vector) ? sizeof(double) : 0;
    }
};

class IntProperty : public Property {
public:
    inline int &operator()(int i) { return static_cast<int *>(h_ptr)[i]; }
};

class FloatProperty : public Property {
public:
    inline double &operator()(int i) { return static_cast<double *>(h_ptr)[i]; }
};

class VectorProperty : public Property {
public:
    double &operator()(int i, int j) {
        PAIRS_ASSERT(type != Prop_Invalid && layout != Invalid && sx > 0 && sy > 0);
        double *dptr = static_cast<double *>(h_ptr);
        if(layout == AoS) { return dptr[i * sy + j]; }
        if(layout == SoA) { return dptr[j * sx + i]; }
        PAIRS_ERROR("VectorProperty::operator[]: Invalid data layout!");
        return dptr[0];
    }
};

class DeviceFlags {
private:
    unsigned long long int *hflags;
    unsigned long long int *dflags;
    int narrays;
    int nflags;
    static const int narrays_per_flag = 64;
public:
    DeviceFlags(int narrays_) : narrays(narrays_) {
        nflags = std::ceil((double) narrays_ / (double) narrays_per_flag);
        hflags = new unsigned long long int[nflags];
        dflags = new unsigned long long int[nflags];

        for(int i = 0; i < nflags; ++i) {
            hflags[i] = 0xfffffffffffffffful;
            dflags[i] = 0x0;
        }
    }

    inline bool isHostFlagSet(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        return (hflags[flag_index] & (1 << bit)) != 0;
    }

    inline void setHostFlag(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        hflags[flag_index] |= 1 << bit;
    }

    inline void clearHostFlag(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        hflags[flag_index] &= ~(1 << bit);
    }

    inline bool isDeviceFlagSet(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        return (dflags[flag_index] & (1 << bit)) != 0;
    }

    inline void setDeviceFlag(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        dflags[flag_index] |= 1 << bit;
    }

    inline void clearDeviceFlag(int array_id) {
        int flag_index = array_id / narrays_per_flag;
        unsigned long long int bit = array_id % narrays_per_flag;
        dflags[flag_index] &= ~(1 << bit);
    }

    void printFlags() {
        fprintf(stderr, "hflags = ");
        for(int i = 0; i < nflags; i++) {
            for(int b = 63; b >= 0; b--) {
                if(hflags[i] & (1 << b)) {
                    fprintf(stderr, "1");
                } else {
                    fprintf(stderr, "0");
                }
            }
        }

        fprintf(stderr, "\n");
        fprintf(stderr, "dflags = ");
        for(int i = 0; i < nflags; i++) {
            for(int b = 63; b >= 0; b--) {
                if(dflags[i] & (1 << b)) {
                    fprintf(stderr, "1");
                } else {
                    fprintf(stderr, "0");
                }
            }
        }

        fprintf(stderr, "\n");
    }

    ~DeviceFlags() {
        delete[] hflags;
        delete[] dflags;
    }
};

template<int ndims>
class PairsSimulation {
private:
    Regular6DStencil<ndims> *dom_part;
    //DomainPartitioner<ndims> *dom_part;
    std::vector<Property> properties;
    std::vector<Array> arrays;
    DeviceFlags *prop_flags, *array_flags;
    DomainPartitioning dom_part_type;
    int nprops, narrays;
public:
    PairsSimulation(int nprops_, int narrays_, DomainPartitioning dom_part_type_) {
        dom_part_type = dom_part_type_;
        prop_flags = new DeviceFlags(nprops_);
        array_flags = new DeviceFlags(narrays_);
    }

    ~PairsSimulation() {
        dom_part->finalize();
        delete prop_flags;
        delete array_flags;
    }

    void initDomain(int *argc, char ***argv, real_t xmin, real_t xmax, real_t ymin, real_t ymax, real_t zmin, real_t zmax) {
        if(dom_part_type == DimRanges) {
            dom_part = new Regular6DStencil<ndims>(xmin, xmax, ymin, ymax, zmin, zmax);
        } else {
            PAIRS_EXCEPTION("Domain partitioning type not implemented!\n");
        }

        dom_part->initialize(argc, argv);
    }

    Regular6DStencil<ndims> *getDomainPartitioner() { return dom_part; }

    template<typename T_ptr>
    void addArray(array_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, size_t size) {
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) malloc(size);
        PAIRS_ASSERT(*h_ptr != nullptr);
        addArray(Array(id, name, *h_ptr, nullptr, size, false));
    }

    template<typename T_ptr>
    void addArray(array_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) {
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) malloc(size);
        *d_ptr = (T_ptr *) pairs::device_alloc(size);
        PAIRS_ASSERT(*h_ptr != nullptr && *d_ptr != nullptr);
        addArray(Array(id, name, *h_ptr, *d_ptr, size, false));
    }

    template<typename T_ptr>
    void addStaticArray(array_t id, std::string name, T_ptr *h_ptr, std::nullptr_t, size_t size) {
        addArray(Array(id, name, h_ptr, nullptr, size, true));
    }

    template<typename T_ptr>
    void addStaticArray(array_t id, std::string name, T_ptr *h_ptr, T_ptr *d_ptr, size_t size) {
        addArray(Array(id, name, h_ptr, d_ptr, size, true));
    }

    void addArray(Array array) {
        int id = array.getId();
        auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; });
        PAIRS_ASSERT(a == std::end(arrays));
        arrays.push_back(array);
    }

    template<typename T_ptr>
    void reallocArray(array_t id, T_ptr **h_ptr, std::nullptr_t, size_t size) {
        // This should be a pointer (and not a reference) in order to be modified
        auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; });
        PAIRS_ASSERT(a != std::end(arrays));
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) realloc(*h_ptr, size);
        PAIRS_ASSERT(*h_ptr != nullptr);

        a->setPointers(*h_ptr, nullptr);
        a->setSize(size);
    }

    template<typename T_ptr>
    void reallocArray(array_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t size) {
        // This should be a pointer (and not a reference) in order to be modified
        auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; });
        PAIRS_ASSERT(a != std::end(arrays));
        PAIRS_ASSERT(size > 0);

        void *new_h_ptr = realloc(*h_ptr, size);
        void *new_d_ptr = pairs::device_realloc(*d_ptr, size);
        PAIRS_ASSERT(new_h_ptr != nullptr && new_d_ptr != nullptr);

        a->setPointers(new_h_ptr, new_d_ptr);
        a->setSize(size);

        *h_ptr = (T_ptr *) new_h_ptr;
        *d_ptr = (T_ptr *) new_d_ptr;
        if(array_flags->isDeviceFlagSet(id)) {
            copyArrayToDevice(id);
        }
    }

    Array &getArray(array_t id) {
        auto a = std::find_if(arrays.begin(), arrays.end(), [id](Array a) { return a.getId() == id; });
        PAIRS_ASSERT(a != std::end(arrays));
        return *a;
    }

    Array &getArrayByName(std::string name) {
        auto a = std::find_if(arrays.begin(), arrays.end(), [name](Array a) { return a.getName() == name; });
        PAIRS_ASSERT(a != std::end(arrays));
        return *a;
    }

    template<typename T_ptr>
    void addProperty(property_t id, std::string name, T_ptr **h_ptr, std::nullptr_t, PropertyType type, layout_t layout, size_t sx, size_t sy=1) {
        size_t size = sx * sy * sizeof(T_ptr);
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) malloc(size);
        PAIRS_ASSERT(*h_ptr != nullptr);
        addProperty(Property(id, name, *h_ptr, nullptr, type, layout, sx, sy));
    }

    template<typename T_ptr>
    void addProperty(property_t id, std::string name, T_ptr **h_ptr, T_ptr **d_ptr, PropertyType type, layout_t layout, size_t sx, size_t sy=1) {
        size_t size = sx * sy * sizeof(T_ptr);
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) malloc(size);
        *d_ptr = (T_ptr *) pairs::device_alloc(size);
        PAIRS_ASSERT(*h_ptr != nullptr && *d_ptr != nullptr);
        addProperty(Property(id, name, *h_ptr, *d_ptr, type, layout, sx, sy));
    }

    void addProperty(Property prop) {
        int id = prop.getId();
        auto p = std::find_if(properties.begin(), properties.end(), [id](Property p) { return p.getId() == id; });
        PAIRS_ASSERT(p == std::end(properties));
        properties.push_back(prop);
    }

    template<typename T_ptr>
    void reallocProperty(property_t id, T_ptr **h_ptr, std::nullptr_t, size_t sx = 1, size_t sy = 1) {
        // This should be a pointer (and not a reference) in order to be modified
        auto p = std::find_if(properties.begin(), properties.end(), [id](Property p) { return p.getId() == id; });
        PAIRS_ASSERT(p != std::end(properties));

        size_t size = sx * sy * p->getElemSize();
        PAIRS_ASSERT(size > 0);

        *h_ptr = (T_ptr *) realloc(*h_ptr, size);
        PAIRS_ASSERT(*h_ptr != nullptr);

        p->setPointers(*h_ptr, nullptr);
        p->setSizes(sx, sy);
    }

    template<typename T_ptr>
    void reallocProperty(property_t id, T_ptr **h_ptr, T_ptr **d_ptr, size_t sx = 1, size_t sy = 1) {
        // This should be a pointer (and not a reference) in order to be modified
        auto p = std::find_if(properties.begin(), properties.end(), [id](Property p) { return p.getId() == id; });
        PAIRS_ASSERT(p != std::end(properties));

        size_t size = sx * sy * p->getElemSize();
        PAIRS_ASSERT(size > 0);

        void *new_h_ptr = realloc(*h_ptr, size);
        void *new_d_ptr = pairs::device_realloc(*d_ptr, size);
        PAIRS_ASSERT(new_h_ptr != nullptr && new_d_ptr != nullptr);

        p->setPointers(new_h_ptr, new_d_ptr);
        p->setSizes(sx, sy);

        *h_ptr = (T_ptr *) new_h_ptr;
        *d_ptr = (T_ptr *) new_d_ptr;
        if(prop_flags->isDeviceFlagSet(id)) {
            copyPropertyToDevice(id);
        }
    }

    Property &getProperty(property_t id) {
        auto p = std::find_if(properties.begin(), properties.end(), [id](Property p) { return p.getId() == id; });
        PAIRS_ASSERT(p != std::end(properties));
        return *p;
    }

    Property &getPropertyByName(std::string name) {
        auto p = std::find_if(properties.begin(), properties.end(), [name](Property p) { return p.getName() == name; });
        PAIRS_ASSERT(p != std::end(properties));
        return *p;
    }

    inline IntProperty &getAsIntegerProperty(Property &prop) { return static_cast<IntProperty&>(prop); }
    inline FloatProperty &getAsFloatProperty(Property &prop) { return static_cast<FloatProperty&>(prop); }
    inline VectorProperty &getAsVectorProperty(Property &prop) { return static_cast<VectorProperty&>(prop); }
    inline IntProperty &getIntegerProperty(property_t property) { return static_cast<IntProperty&>(getProperty(property)); }
    inline FloatProperty &getFloatProperty(property_t property) { return static_cast<FloatProperty&>(getProperty(property)); }
    inline VectorProperty &getVectorProperty(property_t property) { return static_cast<VectorProperty&>(getProperty(property)); }

    void setArrayDeviceFlag(array_t id) { setArrayDeviceFlag(getArray(id)); }
    void setArrayDeviceFlag(Array &array) { array_flags->setDeviceFlag(array.getId()); }
    void clearArrayDeviceFlag(array_t id) { clearArrayDeviceFlag(getArray(id)); }
    void clearArrayDeviceFlag(Array &array) { array_flags->clearDeviceFlag(array.getId()); }
    void copyArrayToDevice(array_t id) { copyArrayToDevice(getArray(id)); }
    void copyArrayToDevice(Array &array) {
        int array_id = array.getId();
        if(!array_flags->isDeviceFlagSet(array_id)) {
            if(array.isStatic()) {
                PAIRS_DEBUG("Copying static array %s to device\n", array.getName().c_str());
                pairs::copy_static_symbol_to_device(array.getHostPointer(), array.getDevicePointer(), array.getSize());
            } else {
                PAIRS_DEBUG("Copying array %s to device\n", array.getName().c_str());
                pairs::copy_to_device(array.getHostPointer(), array.getDevicePointer(), array.getSize());
            }
        }
    }

    void setArrayHostFlag(array_t id) { setArrayHostFlag(getArray(id)); }
    void setArrayHostFlag(Array &array) { array_flags->setHostFlag(array.getId()); }
    void clearArrayHostFlag(array_t id) { clearArrayHostFlag(getArray(id)); }
    void clearArrayHostFlag(Array &array) { array_flags->clearHostFlag(array.getId()); }
    void copyArrayToHost(array_t id) { copyArrayToHost(getArray(id)); }
    void copyArrayToHost(Array &array) {
        int array_id = array.getId();
        if(!array_flags->isHostFlagSet(array_id)) {
            if(array.isStatic()) {
                PAIRS_DEBUG("Copying static array %s to host\n", array.getName().c_str());
                pairs::copy_static_symbol_to_host(array.getDevicePointer(), array.getHostPointer(), array.getSize());
            } else {
                PAIRS_DEBUG("Copying array %s to host\n", array.getName().c_str());
                pairs::copy_to_host(array.getDevicePointer(), array.getHostPointer(), array.getSize());
            }
        }
    }

    void setPropertyDeviceFlag(property_t id) { setPropertyDeviceFlag(getProperty(id)); }
    void setPropertyDeviceFlag(Property &prop) { prop_flags->setDeviceFlag(prop.getId()); }
    void clearPropertyDeviceFlag(property_t id) { clearPropertyDeviceFlag(getProperty(id)); }
    void clearPropertyDeviceFlag(Property &prop) { prop_flags->clearDeviceFlag(prop.getId()); }
    void copyPropertyToDevice(property_t id) { copyPropertyToDevice(getProperty(id)); }
    void copyPropertyToDevice(Property &prop) {
        if(!prop_flags->isDeviceFlagSet(prop.getId())) {
            PAIRS_DEBUG("Copying property %s to device\n", prop.getName().c_str());
            pairs::copy_to_device(prop.getHostPointer(), prop.getDevicePointer(), prop.getTotalSize());
        }
    }

    void setPropertyHostFlag(property_t id) { setPropertyHostFlag(getProperty(id)); }
    void setPropertyHostFlag(Property &prop) { prop_flags->setHostFlag(prop.getId()); }
    void clearPropertyHostFlag(property_t id) { clearPropertyHostFlag(getProperty(id)); }
    void clearPropertyHostFlag(Property &prop) { prop_flags->clearHostFlag(prop.getId()); }
    void copyPropertyToHost(property_t id) { copyPropertyToHost(getProperty(id)); }
    void copyPropertyToHost(Property &prop) {
        if(!prop_flags->isHostFlagSet(prop.getId())) {
            PAIRS_DEBUG("Copying property %s to host\n", prop.getName().c_str());
            pairs::copy_to_host(prop.getDevicePointer(), prop.getHostPointer(), prop.getTotalSize());
        }
    }

    void communicateSizes(int dim, const int *send_sizes, int *recv_sizes) {
        this->getDomainPartitioner()->communicateSizes(dim, send_sizes, recv_sizes);
        PAIRS_DEBUG("send_sizes=[%d, %d], recv_sizes=[%d, %d]\n", send_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1], recv_sizes[dim * 2 + 0], recv_sizes[dim * 2 + 1]);
    }

    void communicateData(
        int dim, int elem_size,
        const real_t *send_buf, const int *send_offsets, const int *nsend,
        real_t *recv_buf, const int *recv_offsets, const int *nrecv) {

        this->getDomainPartitioner()->communicateData(dim, elem_size, send_buf, send_offsets, nsend, recv_buf, recv_offsets, nrecv);

        const int elems_to_print = 5;

        // Send buffer debug
        for(int i = 0; i < 2; i++) {
            int nsnd = nsend[dim * 2 + i];

            PAIRS_DEBUG("send_buf=[");
            for(int j = 0; j < MIN(elems_to_print, nsnd); j++) {
                for(int k = 0; k < elem_size; k++) {
                    PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]);
                }
            }

            if(elems_to_print * 2 < nsnd) {
                PAIRS_DEBUG("\b ... ");
            }

            for(int j = MAX(elems_to_print, nsnd - elems_to_print); j < nsnd; j++) {
                for(int k = 0; k < elem_size; k++) {
                    PAIRS_DEBUG("%f,", send_buf[(send_offsets[dim * 2 + i] + j) * elem_size + k]);
                }
            }

            PAIRS_DEBUG("\b]\n");
        }

        // Receive buffer debug
        for(int i = 0; i < 2; i++) {
            int nrec = nrecv[dim * 2 + i];

            PAIRS_DEBUG("recv_buf=[");
            for(int j = 0; j < MIN(elems_to_print, nrec); j++) {
                for(int k = 0; k < elem_size; k++) {
                    PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]);
                }
            }

            if(elems_to_print * 2 < nrec) {
                PAIRS_DEBUG("\b ... ");
            }

            for(int j = MAX(elems_to_print, nrec - elems_to_print); j < nrec; j++) {
                for(int k = 0; k < elem_size; k++) {
                    PAIRS_DEBUG("%f,", recv_buf[(recv_offsets[dim * 2 + i] + j) * elem_size + k]);
                }
            }

            PAIRS_DEBUG("\b]\n");
        }
    }

    void fillCommunicationArrays(int neighbor_ranks[], int pbc[], real_t subdom[]) {
        this->getDomainPartitioner()->fillArrays(neighbor_ranks, pbc, subdom);
    }
};

}