Learn » Examples » Example Jacobi 2D Program

The full code for this example can be found here: examples/collection/jacobi2d_vt.cc.

//
// This code applies a few steps of the Jacobi iteration to
// the linear system  A x = 0
// where A is a banded symmetric positive definite matrix.
// The initial guess for x is a made-up non-zero vector.
// The exact solution is the vector 0.
//
// The matrix A is square and invertible.
// The number of rows is ((number of objects) * (number of rows per object))
//
// Such a matrix A is obtained when using 2nd-order finite difference
// for discretizing
//
// -d^2 u / dx^2 -d^2 u / dy^2 = f   on  [0, 1] x [0, 1]
//
// with homogeneous Dirichlet condition
//
// u = 0 on the boundary of [0, 1] x [0, 1]
//
// using a uniform grid with grid size
//
// 1 / ((number of objects) * (number of rows per object) + 1)
//


static constexpr std::size_t const default_nrow_object = 8;
static constexpr std::size_t const default_num_objs = 4;
static constexpr double const default_tol = 1.0e-02;

struct NodeObj {
  bool is_finished_ = false;
  void workFinishedHandler() { is_finished_ = true; }
  bool isWorkFinished() { return is_finished_; }
};
using NodeObjProxy = vt::objgroup::proxy::Proxy<NodeObj>;

struct LinearPb2DJacobi : vt::Collection<LinearPb2DJacobi,vt::Index2D> {

private:

  std::vector<double> tcur_, told_;
  std::vector<double> rhs_;
  size_t iter_ = 0;
  size_t msgReceived_ = 0, totalReceive_ = 0;
  size_t numObjsX_ = 1, numObjsY_ = 1;
  size_t numRowsPerObject_ = default_nrow_object;
  size_t maxIter_ = 5;
  NodeObjProxy objProxy_;

public:

  LinearPb2DJacobi()
    : tcur_(), told_(), rhs_(), iter_(0),
      msgReceived_(0), totalReceive_(0),
      numObjsX_(1), numObjsY_(1),
      numRowsPerObject_(default_nrow_object),
      maxIter_(5)
  { }


  struct BlankMsg : vt::CollectionMessage<LinearPb2DJacobi> { };

  struct LPMsg : vt::CollectionMessage<LinearPb2DJacobi> {

    size_t numXObjs = 0;
    size_t numYObjs = 0;
    size_t numIter = 0;
    NodeObjProxy objProxy;

    LPMsg() = default;

    LPMsg(const size_t nx, const size_t ny, const size_t nref, NodeObjProxy proxy)
      : numXObjs(nx), numYObjs(ny), numIter(nref), objProxy(proxy)
    { }

  };


  void checkCompleteCB(double const normRes) {
    //
    // Only one object for the reduction will visit
    // this function
    //

    auto const iter_max_reached = iter_ > maxIter_;
    auto const norm_res_done = normRes < default_tol;

    if (iter_max_reached or norm_res_done) {
      auto const to_print = iter_max_reached ?
        "\n Maximum Number of Iterations Reached. \n\n" :
        fmt::format("\n Max-Norm Residual Reduced by {} \n\n", default_tol);

      fmt::print(to_print);

      // Notify all nodes that computation is finished
      objProxy_.broadcast<&NodeObj::workFinishedHandler>();
    } else {
      fmt::print(" ## ITER {} >> Residual Norm = {} \n", iter_, normRes);
    }
  }

  void doIteration() {

    //
    //--- Copy ghost values
    //

    size_t ldx = numRowsPerObject_ + 2;
    size_t ldy = numRowsPerObject_ + 2;

    for (size_t jx = 0; jx < ldx; ++jx)
      tcur_[jx] = told_[jx];

    for (size_t jx = 0; jx < ldx; ++jx)
      tcur_[jx + (ldy-1) * ldx] = told_[jx + (ldy-1) * ldx];

    for (size_t jy = 0; jy < ldy; ++jy)
      tcur_[jy * ldx] = told_[jy * ldx];

    for (size_t jy = 0; jy < ldy; ++jy)
      tcur_[ldx-1 + jy * ldx] = told_[ldx-1 + jy * ldx];

    //
    //--- Update my row values
    //

    for (size_t iy = 1; iy <= numRowsPerObject_; ++iy) {
      for (size_t ix = 1; ix <= numRowsPerObject_; ++ix) {
        //
        //---- Jacobi iteration step for
        //---- A banded matrix for the 5-point stencil
        //---- [ 0.0  -1.0   0.0]
        //---- [-1.0   4.0  -1.0]
        //---- [ 0.0  -1.0   0.0]
        //---- rhs_ right hand side vector
        //
        size_t node = ix + iy * ldx;
        tcur_[node] = 0.25 * (rhs_[node]
                              + told_[node - 1] + told_[node + 1]
                              + told_[node - ldx] + told_[node + ldx]);
      }
    }

    iter_ += 1;

    std::copy(tcur_.begin(), tcur_.end(), told_.begin());

    //
    // Compute the maximum entries among the rows on this object
    // We do not take into account the "ghost" entries
    // as they may be "out of date".
    //

    double maxNorm = 0.0;

    for (size_t iy = 1; iy <= numRowsPerObject_; ++iy) {
      for (size_t ix = 1; ix <= numRowsPerObject_; ++ix) {
        size_t node = ix + iy * ldx;
        double val = tcur_[node];
        maxNorm = (maxNorm > std::fabs(val)) ? maxNorm : std::fabs(val);
      }
    }

    auto proxy = this->getCollectionProxy();
    proxy.reduce<&LinearPb2DJacobi::checkCompleteCB, vt::collective::MaxOp>(
      proxy(0,0), maxNorm
    );
  }

  struct VecMsg : vt::CollectionMessage<LinearPb2DJacobi> {
    using MessageParentType = vt::CollectionMessage<LinearPb2DJacobi>;
    vt_msg_serialize_required(); // stl vector

    VecMsg() = default;
    VecMsg(IndexType const& in_index, const std::vector<double> &ref) :
      vt::CollectionMessage<LinearPb2DJacobi>(),
      from_index(in_index), val(ref)
    { }

    template <typename Serializer>
    void serialize(Serializer& s) {
      MessageParentType::serialize(s);
      s | from_index;
      s | val;
    }

    IndexType from_index;
    std::vector<double> val;
  };

  void exchange(VecMsg *msg) {

    // Receive and treat the message from a neighboring object.

    if (this->getIndex().x() > msg->from_index.x()) {
      const size_t ldx = numRowsPerObject_ + 2;
      for (size_t jy = 0; jy < msg->val.size(); ++jy) {
        this->told_[jy*ldx] = msg->val[jy];
      }
      msgReceived_ += 1;
    }
    else if (this->getIndex().x() < msg->from_index.x()) {
      const size_t ldx = numRowsPerObject_ + 2;
      for (size_t jy = 0; jy < msg->val.size(); ++jy) {
        this->told_[numRowsPerObject_ + 1 + jy*ldx] = msg->val[jy];
      }
      msgReceived_ += 1;
    }
    else if (this->getIndex().y() > msg->from_index.y()) {
      std::copy(msg->val.begin(), msg->val.end(), this->told_.begin());
      msgReceived_ += 1;
    }
    else if (this->getIndex().y() < msg->from_index.y()) {
      std::copy(msg->val.begin(), msg->val.end(),
                &this->told_[(numRowsPerObject_ + 1)*(numRowsPerObject_ + 2)]);
      msgReceived_ += 1;
    }

    if (msgReceived_ == totalReceive_) {
      msgReceived_ = 0;
      doIteration();
    }

  }

  void doIter([[maybe_unused]] BlankMsg *msg) {

    //
    // Treat the particular case of 1 object
    // where no communication is needed.
    // Without this treatment, the code would not iterate.
    //
    if (numObjsX_*numObjsY_ <= 1) {
      doIteration();
      return;
    }
    //---------------------------------------

    //
    // Routine to send information to a neighboring object
    //

    auto proxy = this->getCollectionProxy();
    auto idx = this->getIndex();
    auto const x = idx.x();
    auto const y = idx.y();

    if (x > 0) {
      std::vector<double> tcopy(numRowsPerObject_ + 2, 0.0);
      for (size_t jy = 1; jy <= numRowsPerObject_; ++jy)
        tcopy[jy] = told_[1 + jy * (numRowsPerObject_ + 2)];
      proxy(x-1, y).send<VecMsg, &LinearPb2DJacobi::exchange>(idx, tcopy);
    }

    if (y > 0) {
      std::vector<double> tcopy(numRowsPerObject_ + 2, 0.0);
      for (size_t jx = 1; jx <= numRowsPerObject_; ++jx)
        tcopy[jx] = told_[jx + (numRowsPerObject_ + 2)];
      proxy(x, y-1).send<VecMsg, &LinearPb2DJacobi::exchange>(idx, tcopy);
    }

    if (size_t(x) < numObjsX_ - 1) {
      std::vector<double> tcopy(numRowsPerObject_ + 2, 0.0);
      for (size_t jy = 1; jy <= numRowsPerObject_; ++jy) {
        tcopy[jy] = told_[numRowsPerObject_ +
                          jy * (numRowsPerObject_ + 2)];
      }
      proxy(x+1, y).send<VecMsg, &LinearPb2DJacobi::exchange>(idx, tcopy);
    }

    if (size_t(y) < numObjsY_ - 1) {
      std::vector<double> tcopy(numRowsPerObject_ + 2, 0.0);
      for (size_t jx = 1; jx <= numRowsPerObject_; ++jx)
        tcopy[jx] = told_[jx + numRowsPerObject_ * (numRowsPerObject_ + 2)];
      proxy(x, y+1).send<VecMsg, &LinearPb2DJacobi::exchange>(idx, tcopy);
    }

  }


  void init() {

    //--- Each object will work with (numRowsPerObject_ + 2) unknowns
    //--- or (numRowsPerObject_ + 2) rows of the matrix
    size_t ldx = numRowsPerObject_ + 2, ldy = ldx;

    size_t vecSize = ldx * ldy;
    tcur_.assign(vecSize, 0.0);
    told_.assign(vecSize, 0.0);
    rhs_.assign(vecSize, 0.0);

    //
    // Set the initial vector to the values of
    // a "high-frequency" function
    //

    double hx = 1.0 / (numRowsPerObject_ * numObjsX_ + 1.0);
    double hy = 1.0 / (numRowsPerObject_ * numObjsY_ + 1.0);

    size_t maxNObjs = (size_t) std::max(numObjsX_, numObjsY_);
    int nf = 3 * int(numRowsPerObject_ * maxNObjs + 1) / 4;

    auto idx = this->getIndex();

    for (size_t iy = 0; iy < ldy; ++iy) {
      for (size_t ix = 0; ix < ldx; ++ix) {
        double x0 = ( numRowsPerObject_ * idx.x() + ix) * hx;
        double y0 = ( numRowsPerObject_ * idx.y() + iy) * hy;
        size_t node = ix + iy * ldx;
        tcur_[node] = sin(nf * M_PI * (x0 * x0 + y0 * y0));
      }
    }

    totalReceive_ = 4;

    //
    //--- The unknowns correspond to the interior nodes
    //--- of a regular orthogonal grid on [0, 1] x [0, 1]
    //--- The total number of grid points in X-direction is
    //--- (numRowsPerObject_ * numObjsX_) + 2
    //--- The total number of grid points in Y-direction is
    //--- (numRowsPerObject_ * numObjsY_) + 2
    //
    if (idx.x() == 0) {
      for (size_t jy = 0; jy < ldy; ++jy)
        tcur_[jy * ldy] = 0.0;
      totalReceive_ -= 1;
    }

    if (idx.y() == 0) {
      for (size_t jx = 0; jx < ldx; ++jx)
        tcur_[jx] = 0.0;
      totalReceive_ -= 1;
    }

    if (numObjsX_ == size_t(idx.x()) + 1) {
      for (size_t jy = 0; jy < ldy; ++jy)
        tcur_[jy * ldy + (ldx - 1)] = 0.0;
      totalReceive_ -= 1;
    }

    if (numObjsY_ == size_t(idx.y()) + 1) {
      for (size_t jx = 0; jx < ldx; ++jx)
        tcur_[jx + (ldx - 1)*ldy] = 0.0;
      totalReceive_ -= 1;
    }

    std::copy(tcur_.begin(), tcur_.end(), told_.begin());

  }


  void init(LPMsg* msg) {

    numObjsX_ = msg->numXObjs;
    numObjsY_ = msg->numYObjs;
    maxIter_ = msg->numIter;
    objProxy_ = msg->objProxy;

    // Initialize the starting vector
    init();
  }

};

bool isWorkDone( vt::objgroup::proxy::Proxy<NodeObj> const& proxy){
  auto const this_node = vt::theContext()->getNode();
  return proxy[this_node].invoke<&NodeObj::isWorkFinished>();
}

int main(int argc, char** argv) {

  size_t numX_objs = default_num_objs;
  size_t numY_objs = default_num_objs;
  size_t maxIter = 10;

  std::string name(argv[0]);

  vt::initialize(argc, argv);

  vt::NodeType this_node = vt::theContext()->getNode();

  if (argc == 1) {
    if (this_node == 0) {
      fmt::print(
        stderr, "{}: using default arguments since none provided\n", name
      );
    }
  } else {
    if (argc == 3) {
      numX_objs = (size_t) strtol(argv[1], nullptr, 10);
      numY_objs = (size_t) strtol(argv[2], nullptr, 10);
    }
    else if (argc == 4) {
      numX_objs = (size_t) strtol(argv[1], nullptr, 10);
      numY_objs = (size_t) strtol(argv[2], nullptr, 10);
      maxIter = (size_t) strtol(argv[3], nullptr, 10);
    }
    else {
      fmt::print(
        stderr, "usage: {} <num-objects-X-direction> <num-objects-Y-direction> <maxiter>\n",
        name
      );
      return 1;
    }
  }

  /* --- Print information about the simulation */

  if (this_node == 0) {
    fmt::print(
      stdout, "\n - Solve the linear system for the Laplacian with homogeneous Dirichlet"
      " on [0, 1] x [0, 1]\n"
    );
    fmt::print(stdout, " - Second-order centered finite difference\n");
    fmt::print(
      stdout, " - Uniform grid with ({} x {} = {}) points in the x-direction and "
      " ({} x {} = {}) points in the y-direction\n",
      numX_objs, default_nrow_object, numX_objs * default_nrow_object,
      numY_objs, default_nrow_object, numY_objs * default_nrow_object
    );
    fmt::print(stdout, " - Maximum number of iterations {}\n", maxIter);
    fmt::print(stdout, " - Convergence tolerance {}\n", default_tol);
    fmt::print(stdout, "\n");
  }

  // Object group of all nodes that take part in computation
  // Used to determine whether the computation is finished
  auto grp_proxy = vt::theObjGroup()->makeCollective<NodeObj>("examples_jacobi2d");

  // Create the decomposition into objects
  using BaseIndexType = typename vt::Index2D::DenseIndexType;
  auto range = vt::Index2D(
    static_cast<BaseIndexType>(numX_objs),
    static_cast<BaseIndexType>(numY_objs)
  );

  auto col_proxy = vt::makeCollection<LinearPb2DJacobi>("examples_jacobi2d")
    .bounds(range)
    .bulkInsert()
    .wait();

  vt::runInEpochCollective([col_proxy, grp_proxy, numX_objs, numY_objs, maxIter] {
    col_proxy.broadcastCollective<LinearPb2DJacobi::LPMsg, &LinearPb2DJacobi::init>(
      numX_objs, numY_objs, maxIter, grp_proxy
    );
  });

  while (!isWorkDone(grp_proxy)) {
    vt::runInEpochCollective([col_proxy] {
      col_proxy.broadcastCollective<
        LinearPb2DJacobi::BlankMsg, &LinearPb2DJacobi::doIter>();
    });

    vt::thePhase()->nextPhaseCollective();
  }

  vt::finalize();

  return 0;

}