diff --git a/htsim/sim/atlahs_htsim_api.cpp b/htsim/sim/atlahs_htsim_api.cpp index 0da4788..33f924a 100644 --- a/htsim/sim/atlahs_htsim_api.cpp +++ b/htsim/sim/atlahs_htsim_api.cpp @@ -31,10 +31,8 @@ void AtlahsHtsimApi::Send(const SendEvent &event, graph_node_properties elem) { - if (getNumberNic() > 1) { - from = getHtsimNodeNumber(from, elem.nic); - to = getHtsimNodeNumber(to, elem.nic); - } + from = getHtsimNodeNumber(from, elem.nic); + to = getHtsimNodeNumber(to, elem.nic); simtime_picosec flow_duration = size * 8 / 200 * 1000; @@ -160,4 +158,4 @@ void AtlahsHtsimApi::EventFinished(const EventOver &event) { } else { abort(); } -} \ No newline at end of file +} diff --git a/htsim/sim/atlahs_htsim_api.h b/htsim/sim/atlahs_htsim_api.h index 2d5b4f7..b0c8f17 100644 --- a/htsim/sim/atlahs_htsim_api.h +++ b/htsim/sim/atlahs_htsim_api.h @@ -49,6 +49,11 @@ class FlowInfo { class AtlahsHtsimApi : public AtlahsApi { public: + enum class GoalRankMapping { + GpuRank, + UniqueNic, + }; + AtlahsHtsimApi() = default; virtual ~AtlahsHtsimApi() = default; @@ -121,6 +126,37 @@ class AtlahsHtsimApi : public AtlahsApi { void setNumberNic(int nic) { number_nics = nic; } int getNumberNic() const { return number_nics; } + void setGoalRankMapping(GoalRankMapping mapping) { goal_rank_mapping = mapping; } + GoalRankMapping getGoalRankMapping() const { return goal_rank_mapping; } + + void setGoalRankMappingFromBinaryHeader(uint32_t rank_count, int cpu_count, int nic_count) { + setNumberNic(nic_count); + + // The binary GOAL format does not store the generator version. Infer + // the layout from the parsed schedule header instead: V1 unique-nic + // files keep ranks at node granularity and use several NICs per rank, + // while V2 files keep ranks at GPU/HTSIM-node granularity. + const bool looks_like_v2_gpu_rank = + nic_count == 1 || + (nic_count == 2 && rank_count > static_cast(nic_count) && cpu_count <= 8); + goal_rank_mapping = + looks_like_v2_gpu_rank ? GoalRankMapping::GpuRank : GoalRankMapping::UniqueNic; + } + + bool usesUniqueNicRankMapping() const { + return goal_rank_mapping == GoalRankMapping::UniqueNic; + } + + const char* getGoalRankMappingName() const { + switch (goal_rank_mapping) { + case GoalRankMapping::UniqueNic: + return "unique-nic"; + case GoalRankMapping::GpuRank: + return "gpu-rank"; + } + return "unknown"; + } + void setNumberNacks(int nacks) { number_of_nacks += nacks; } uint64_t getNumberNacks() const { return number_of_nacks; } @@ -128,7 +164,7 @@ class AtlahsHtsimApi : public AtlahsApi { simtime_picosec getGlobalTimeNs() const { return _eventlist->now() / 1000; } int getHtsimNodeNumber(int lgs_host, int lgs_nic) { - return lgs_host * number_nics + lgs_nic; + return usesUniqueNicRankMapping() ? lgs_host * number_nics + lgs_nic : lgs_host; } linkspeed_bps linkspeed; // TO DO @@ -158,6 +194,7 @@ class AtlahsHtsimApi : public AtlahsApi { // LGS Specific int number_nics = 1; + GoalRankMapping goal_rank_mapping = GoalRankMapping::GpuRank; // EQDS Specific vector pacersEQDS; @@ -179,4 +216,4 @@ class AtlahsHtsimApi : public AtlahsApi { std::function()> mp_factory = nullptr; }; -#endif // ATLAHS_HTSIM_API_H \ No newline at end of file +#endif // ATLAHS_HTSIM_API_H diff --git a/htsim/sim/logsim-interface.cpp b/htsim/sim/logsim-interface.cpp index 81550ad..59de43f 100644 --- a/htsim/sim/logsim-interface.cpp +++ b/htsim/sim/logsim-interface.cpp @@ -392,6 +392,14 @@ int start_lgs(std::string filename_goal, LogSimInterface &lgs) { const uint p = parser.schedules.size(); const int ncpus = parser.GetNumCPU(); const int nnics = parser.GetNumNIC(); + lgs_interface->htsim_api->setGoalRankMappingFromBinaryHeader( + static_cast(p), ncpus, nnics); + printf("[ATLAHS] GOAL rank mapping: %s (LGS ranks=%u, CPUs=%d, NICs=%d, HTSIM nodes=%u)\n", + lgs_interface->htsim_api->getGoalRankMappingName(), + p, + ncpus, + nnics, + lgs_interface->htsim_api->usesUniqueNicRankMapping() ? p * nnics : p); bool comm_dep_file_arg = false; @@ -509,7 +517,6 @@ int start_lgs(std::string filename_goal, LogSimInterface &lgs) { bool comm_dep_file_given = false; bool qstat_arg = false; bool batchmode_given = false; - lgs_interface->htsim_api->setNumberNic(nnics); bool progress_given = true;