Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions xprof/convert/smart_suggestion/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,21 @@ cc_library(
],
)

cc_library(
name = "infeed_rule",
hdrs = ["infeed_rule.h"],
deps = [
":constants",
":signal_provider",
":smart_suggestion_rule",
"@com_google_absl//absl/container:flat_hash_map",
"@com_google_absl//absl/status:statusor",
"@com_google_absl//absl/strings",
"@com_google_absl//absl/strings:str_format",
"@org_xprof//plugin/xprof/protobuf:smart_suggestion_proto_cc",
],
)

cc_library(
name = "data_transfer_bound_rule",
hdrs = ["data_transfer_bound_rule.h"],
Expand Down Expand Up @@ -262,6 +277,7 @@ cc_library(
":data_transfer_bound_rule",
":debug_print_rule",
":host_processing_bound_rule",
":infeed_rule",
":memory_bound_rule",
":smart_suggestion_rule_factory",
":sparse_core_offload_rule",
Expand Down
3 changes: 3 additions & 0 deletions xprof/convert/smart_suggestion/all_rules.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ limitations under the License.
#include "xprof/convert/smart_suggestion/data_transfer_bound_rule.h"
#include "xprof/convert/smart_suggestion/debug_print_rule.h"
#include "xprof/convert/smart_suggestion/host_processing_bound_rule.h"
#include "xprof/convert/smart_suggestion/infeed_rule.h"
#include "xprof/convert/smart_suggestion/memory_bound_rule.h"
#include "xprof/convert/smart_suggestion/smart_suggestion_rule_factory.h"
#include "xprof/convert/smart_suggestion/sparse_core_offload_rule.h"
Expand All @@ -41,6 +42,7 @@ inline void RegisterAllRules(SmartSuggestionRuleFactory* f) {
f->Register<DataTransferBoundRule>();
f->Register<DebugPrintRule>();
f->Register<HostProcessingBoundRule>();
f->Register<InfeedRule>();
f->Register<MemoryBoundRule>();
f->Register<SparseCoreOffloadRule>();
f->Register<TensorCoreIdleBoundRule>();
Expand All @@ -51,6 +53,7 @@ inline void RegisterAllRules(SmartSuggestionRuleFactory* f) {
inline void RegisterAllRulesFor3P(SmartSuggestionRuleFactory* f) {
// go/keep-sorted start
f->Register<BarrierCoresRule>();
f->Register<InfeedRule>();
// TODO Enable SparseCoreOffloadRule for 3P.
// go/keep-sorted end
}
Expand Down
4 changes: 4 additions & 0 deletions xprof/convert/smart_suggestion/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ inline constexpr double kSpecialOpBoundThresholdInPercent = 10;
// than this threshold, it is considered a bottleneck.
inline constexpr double kDebugPrintBoundThresholdInPercent = 5;

// If the percentage of step time that is due to the infeed op is higher than
// this threshold, it is considered a bottleneck.
inline constexpr double kInfeedBoundThresholdInPercent = 10;

// If the percentage of async-done time is higher than this threshold, it is
// considered a bottleneck.
inline constexpr double kAsyncDoneThresholdInPercent = 10;
Expand Down
123 changes: 123 additions & 0 deletions xprof/convert/smart_suggestion/infeed_rule.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* Copyright 2025 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#ifndef THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_INFEED_RULE_H_
#define THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_INFEED_RULE_H_

#include <algorithm>
#include <optional>
#include <string>
#include <vector>

#include "absl/container/flat_hash_map.h"
#include "absl/status/statusor.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/str_format.h"
#include "absl/strings/str_join.h"
#include "xprof/convert/smart_suggestion/constants.h"
#include "xprof/convert/smart_suggestion/signal_provider.h"
#include "xprof/convert/smart_suggestion/smart_suggestion_rule.h"
#include "plugin/xprof/protobuf/smart_suggestion.pb.h"

namespace tensorflow {
namespace profiler {

constexpr char kInfeedOpName[] = "infeed";

// Rule to detect infeed percentage bottleneck.
class InfeedRule : public SmartSuggestionRule {
public:
bool MeetsConditions(const SignalProvider& signal_provider) const override {
auto host_stats =
signal_provider.GetPerHostAvgEventTimePercent(kInfeedOpName);
if (!host_stats.ok() || host_stats->empty()) {
return false;
}
for (const auto& host_stat : *host_stats) {
if (host_stat.second >= kDebugPrintBoundThresholdInPercent) {
return true;
}
}
return false;
}

// Generates suggestions if the infeed percentage is above the threshold.
absl::StatusOr<std::optional<SmartSuggestion>> GenerateSuggestion(
const SignalProvider& signal_provider) const override {
SmartSuggestion suggestion;
suggestion.set_rule_name("InfeedRule");
// If MeetsConditions passed, GetPerHostAvgEventTimePercent is ok and has
// hosts with fractions >= kDebugPrintBoundThresholdInPercent.
absl::flat_hash_map<std::string, double> high_infeed_hosts;
double max_percent = 0.0;
auto host_stats =
signal_provider.GetPerHostAvgEventTimePercent(kInfeedOpName);
if (host_stats.ok()) {
for (const auto& host_stat : *host_stats) {
if (host_stat.second >= kDebugPrintBoundThresholdInPercent) {
high_infeed_hosts.insert(host_stat);
if (host_stat.second > max_percent) {
max_percent = host_stat.second;
}
}
}
}
std::string infeed_hosts_list_html;
std::string infeed_suggestion;
if (high_infeed_hosts.size() > 5) {
infeed_hosts_list_html = absl::StrCat(
" <b>", high_infeed_hosts.size(),
" hosts</b> have an average infeed time fraction above <b>",
absl::StrFormat("%.1f", kDebugPrintBoundThresholdInPercent),
"%</b>.");
infeed_suggestion =
absl::StrCat("<li><b>Investigate Hosts with High Infeed Time:",
infeed_hosts_list_html, "</li>");
} else {
std::vector<std::string> host_entries;
for (const auto& [hostname, avg_percent] : high_infeed_hosts) {
host_entries.push_back(absl::StrCat(
"Host <b>", hostname, "</b> ",
"average infeed time fraction: <b>",
absl::StrFormat("%.1f", avg_percent), "%</b>"));
}
std::sort(host_entries.begin(), host_entries.end());
infeed_hosts_list_html = absl::StrJoin(host_entries, ", ");
infeed_suggestion = absl::StrCat(
"<li><b>Investigate Hosts with High Infeed Time"
":</b> The following hosts have high infeed time fraction: ",
infeed_hosts_list_html, "</li>");
}

auto display_name = absl::StrCat(kInfeedOpName);
std::string suggestion_text = absl::StrCat(
"<p>Your program is likely bottlenecked by <b>", display_name,
"</b> operations: <b> up to ",
absl::StrFormat("%.1f", max_percent),
"% of step time</b> is spent on these operations on some hosts. "
"Please consider the following "
"optimizations:</p>",
"<ul>", infeed_suggestion);
absl::StrAppend(&suggestion_text, "</ul>");

suggestion.set_suggestion_text(suggestion_text);
return suggestion;
}
};

} // namespace profiler
} // namespace tensorflow

#endif // THIRD_PARTY_XPROF_CONVERT_SMART_SUGGESTION_INFEED_RULE_H_