vt::vrt::collection::lb::TemperedLB struct

Base classes

struct BaseLB

Derived classes

struct TemperedWMin

Public types

using LoadMsgAsync = balance::LoadMsgAsync
using LoadMsgSync = balance::LoadMsg
using NodeSetType = std::vector<NodeType>
using ObjsType = std::unordered_map<ObjIDType, LoadType>
using ReduceMsgType = vt::collective::ReduceNoneMsg
using QuantityType = std::map<lb::StatisticQuantity, double>
using StatisticMapType = std::unordered_map<lb::Statistic, QuantityType>
using EdgeMapType = std::unordered_map<elm::ElementIDStruct, std::vector<std::tuple<elm::ElementIDStruct, double>>>

Public static functions

static auto getInputKeysWithHelp() -> std::unordered_map<std::string, std::string>
static auto orderObjects(ObjectOrderEnum obj_ordering, std::unordered_map<ObjIDType, LoadType> cur_objs, LoadType this_new_load, LoadType target_max_load) -> std::vector<ObjIDType>

Constructors, destructors, conversion operators

TemperedLB() defaulted
TemperedLB(TemperedLB const&) deleted
~TemperedLB() virtual

Public functions

void init(objgroup::proxy::Proxy<TemperedLB> in_proxy)
void runLB(LoadType total_load) override
void inputParams(balance::ConfigEntry* config) override

Protected types

struct LockedInfoMsg
The update message that comes from a rank when it is locked. This is a message instead of a normal handler so it can be buffered without copying it.

Protected functions

void doLBStages(LoadType start_imb)
void informAsync()
void informSync()
void originalTransfer()
void swapClusters()
void migrate()
void propagateRound(uint8_t k_cur_async, bool sync, EpochType epoch = no_epoch)
void propagateIncomingAsync(LoadMsgAsync* msg)
void propagateIncomingSync(LoadMsgSync* msg)
auto isUnderloaded(LoadType load) const -> bool
auto isUnderloadedRelaxed(LoadType over, LoadType under) const -> bool
auto isOverloaded(LoadType load) const -> bool
auto createCMF(NodeSetType const& under) -> std::vector<double>
auto sampleFromCMF(NodeSetType const& under, std::vector<double> const& cmf) -> NodeType
auto makeUnderloaded() const -> std::vector<NodeType>
auto makeSufficientlyUnderloaded(LoadType load_to_accommodate) const -> std::vector<NodeType>
auto selectObject(LoadType size, ElementLoadType& load, std::set<ObjIDType> const& available) -> ElementLoadType::iterator
auto getModeledValue(const elm::ElementIDStruct& obj) -> LoadType virtual
void lazyMigrateObjsTo(EpochType epoch, NodeType node, ObjsType const& objs)
void inLazyMigrations(balance::LazyMigrationMsg* msg)
void loadStatsHandler(std::vector<balance::LoadData> const& vec)
void workStatsHandler(std::vector<balance::LoadData> const& vec)
void rejectionStatsHandler(int n_rejected, int n_transfers, int n_unhomed_blocks, int cycle_count)
void maxIterTime(double max_iter_time)
void remoteBlockCountHandler(int n_unhomed_blocks)
void thunkMigrations()
void setupDone()
void readClustersMemoryData()
Read the memory data from the user-defined json blocks into data structures.
auto computeMemoryUsage() -> BytesType
Compute the memory usage for current assignment.
auto getSharedBlocksHere() const -> std::set<SharedIDType>
Get the shared blocks that are located on this node with the current object assignment.
auto getRemoteBlockCountHere() const -> int
Get the number of shared blocks that are located on this node with the current object assignment but are not homed here.
void computeClusterSummary()
Compute the current cluster assignment summary for this rank.
auto makeClusterSummary(SharedIDType shared_id) -> ClusterInfo
Make cluster summary info.
void tryLock(NodeType requesting_node, double criterion_value)
Try to lock a rank.
void satisfyLockRequest()
Satisfy a lock request (if there is one)
void lockObtained(LockedInfoMsg* msg)
Inform a rank that a lock was obtained.
auto memoryTransferCriterion(double try_total_bytes, double src_bytes) -> bool
Compute memory component of tempered transfer criterion.
auto loadTransferCriterion(double before_w_src, double before_w_dst, double after_w_src, double after_w_dst) -> double
Compute load component of tempered transfer criterion.
auto computeWork(double load, double inter_comm_bytes, double intra_comm_bytes, double shared_comm_bytes) const -> double
Compute the amount of work based on the work model.
auto computeWorkBreakdown(NodeType node, std::unordered_map<ObjIDType, LoadType> const& objs, std::set<ObjIDType> const& exclude = {}, std::unordered_map<ObjIDType, LoadType> const& include = {}) -> WorkBreakdown
Compute work based on a a set of objects.
auto computeWorkAfterClusterSwap(NodeType node, NodeInfo const& info, ClusterInfo const& to_remove, ClusterInfo const& to_add) -> double
void considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg)
Consider possible swaps with all the up-to-date info from a rank.
void releaseLock()
Release a lock on a rank.
void giveCluster(NodeType from_rank, std::unordered_map<SharedIDType, BytesType> const& give_shared_blocks_size, std::unordered_map<ObjIDType, LoadType> const& give_objs, std::unordered_map<ObjIDType, SharedIDType> const& give_obj_shared_block, std::unordered_map<ObjIDType, BytesType> const& give_obj_working_bytes, SharedIDType take_cluster)
Give a cluster to a rank.
auto removeClusterToSend(SharedIDType shared_id, std::set<ObjIDType> objs = {}) -> auto
Remove a cluster to send. Does all the bookkeeping associated with removing the cluster.

Function documentation

BytesType vt::vrt::collection::lb::TemperedLB::computeMemoryUsage() protected

Compute the memory usage for current assignment.

Returns the total memory usage

std::set<SharedIDType> vt::vrt::collection::lb::TemperedLB::getSharedBlocksHere() const protected

Get the shared blocks that are located on this node with the current object assignment.

Returns the set of shared blocks here

int vt::vrt::collection::lb::TemperedLB::getRemoteBlockCountHere() const protected

Get the number of shared blocks that are located on this node with the current object assignment but are not homed here.

Returns the number of unhomed shared blocks here

ClusterInfo vt::vrt::collection::lb::TemperedLB::makeClusterSummary(SharedIDType shared_id) protected

Make cluster summary info.

Parameters
shared_id in the shared ID
Returns the info

void vt::vrt::collection::lb::TemperedLB::tryLock(NodeType requesting_node, double criterion_value) protected

Try to lock a rank.

Parameters
requesting_node in the requesting rank asking to lock
criterion_value in the criterion evaluation value to compare

void vt::vrt::collection::lb::TemperedLB::lockObtained(LockedInfoMsg* msg) protected

Inform a rank that a lock was obtained.

Parameters
msg in update message with all the info

bool vt::vrt::collection::lb::TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_bytes) protected

Compute memory component of tempered transfer criterion.

Parameters
try_total_bytes in total memory bytes on target rank
src_bytes in memory bytes to be transferred from source rank

double vt::vrt::collection::lb::TemperedLB::loadTransferCriterion(double before_w_src, double before_w_dst, double after_w_src, double after_w_dst) protected

Compute load component of tempered transfer criterion.

Parameters
before_w_src in original work on source rank
before_w_dst in original work on destination rank
after_w_src in new work on source rank
after_w_dst in new work on destination rank

double vt::vrt::collection::lb::TemperedLB::computeWork(double load, double inter_comm_bytes, double intra_comm_bytes, double shared_comm_bytes) const protected

Compute the amount of work based on the work model.

Parameters
load in the load for a rank
inter_comm_bytes
intra_comm_bytes
shared_comm_bytes
Returns the amount of work

WorkBreakdown vt::vrt::collection::lb::TemperedLB::computeWorkBreakdown(NodeType node, std::unordered_map<ObjIDType, LoadType> const& objs, std::set<ObjIDType> const& exclude = {}, std::unordered_map<ObjIDType, LoadType> const& include = {}) protected

Compute work based on a a set of objects.

Parameters
node in the node these objects are mapped to
objs in input set of objects
exclude in a set of objects to exclude that are in objs
include in a map of objects to include that are not in objs
Returns the amount of work currently for the set of objects

void vt::vrt::collection::lb::TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) protected

Consider possible swaps with all the up-to-date info from a rank.

Parameters
msg in update message with all the info

void vt::vrt::collection::lb::TemperedLB::giveCluster(NodeType from_rank, std::unordered_map<SharedIDType, BytesType> const& give_shared_blocks_size, std::unordered_map<ObjIDType, LoadType> const& give_objs, std::unordered_map<ObjIDType, SharedIDType> const& give_obj_shared_block, std::unordered_map<ObjIDType, BytesType> const& give_obj_working_bytes, SharedIDType take_cluster) protected

Give a cluster to a rank.

Parameters
from_rank in the rank it's coming from
give_shared_blocks_size in the shared block info for the swap
give_objs in the objects given
give_obj_shared_block in the shared block the objs are part of
give_obj_working_bytes in the working bytes for the objs
take_cluster in (optional) a cluster requested in return

auto vt::vrt::collection::lb::TemperedLB::removeClusterToSend(SharedIDType shared_id, std::set<ObjIDType> objs = {}) protected

Remove a cluster to send. Does all the bookkeeping associated with removing the cluster.

Parameters
shared_id in the shared ID of the cluster to remove
objs in the set of objects to send with that shared ID (optional, if not specified then send all of them)
Returns a tuple with all the information to send to giveCluster