Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/config.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
# - ON: enable CUDA with cmake's auto search
# - OFF: disable CUDA
# - /path/to/cuda: use specific path to cuda toolkit
set(USE_CUDA OFF)
set(USE_CUDA ON)

# Whether to enable NCCL support:
# - ON: enable NCCL with cmake's auto search
Expand Down Expand Up @@ -158,7 +158,7 @@ set(USE_PROFILER ON)
# - OFF: disable llvm, note this will disable CPU codegen
# which is needed for most cases
# - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available.
set(USE_LLVM "/projs/AE/dongshouyang/clang+llvm-11.0.0-x86_64-linux-gnu-ubuntu-20.04/bin/llvm-config")
set(USE_LLVM ON)

# Whether use MLIR to help analyze, requires USE_LLVM is enabled
# Possible values: ON/OFF
Expand Down
5 changes: 4 additions & 1 deletion include/tvm/meta_schedule/task_scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ class TaskSchedulerNode : public runtime::Object {
* \brief Jointly tune a given list of tasks.
* \param tasks The tasks to be tuned
* \param task_weights The weight of each task
* \param tuning_time The time of tuning
* \param max_trials_global The maximum number of trials to be performed globally
* \param max_trials_per_task The maximum number of trials to be performed for each task
* \param num_trials_per_iter The number of trials to be performed in each iteration
Expand All @@ -178,6 +179,7 @@ class TaskSchedulerNode : public runtime::Object {
*/
virtual void Tune(Array<TuneContext> tasks, //
Array<FloatImm> task_weights, //
int tuning_time, //
int max_trials_global, //
int max_trials_per_task, //
int num_trials_per_iter, //
Expand Down Expand Up @@ -221,6 +223,7 @@ class PyTaskSchedulerNode : public TaskSchedulerNode {
/*! \brief The function type of `Tune` method. */
using FTune = runtime::TypedPackedFunc<void(Array<TuneContext> tasks, //
Array<FloatImm> task_weights, //
int tuning_time, //
int max_trials_global, //
int max_trials_per_task, //
int num_trials_per_iter, //
Expand All @@ -246,7 +249,7 @@ class PyTaskSchedulerNode : public TaskSchedulerNode {

int NextTaskId() final;
Array<RunnerResult> JoinRunningTask(int task_id) final;
void Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights, int max_trials_global,
void Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights, int tuning_time, int max_trials_global,
int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner,
Array<MeasureCallback> measure_callbacks, Optional<Database> database,
Optional<CostModel> cost_model) final;
Expand Down
4 changes: 4 additions & 0 deletions python/tvm/meta_schedule/task_scheduler/task_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def tune(
self,
tasks: List[TuneContext],
task_weights: List[float],
tuning_time: int,
max_trials_global: int,
max_trials_per_task: int,
num_trials_per_iter: int,
Expand Down Expand Up @@ -133,6 +134,7 @@ def tune(
self,
tasks,
task_weights,
tuning_time,
max_trials_global,
max_trials_per_task,
num_trials_per_iter,
Expand Down Expand Up @@ -236,6 +238,7 @@ def tune(
self,
tasks: List[TuneContext],
task_weights: List[float],
tuning_time,
max_trials_global: int,
max_trials_per_task: int,
builder: Builder,
Expand All @@ -250,6 +253,7 @@ def tune(
self._outer(), # type: ignore # pylint: disable=no-member
tasks,
task_weights,
tuning_time,
max_trials_global,
max_trials_per_task,
builder,
Expand Down
4 changes: 4 additions & 0 deletions python/tvm/meta_schedule/tir_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def tune_tir( # pylint: disable=too-many-locals
work_dir: str,
max_trials_global: int,
*,
tuning_time: int = 1000000,
max_trials_per_task: Optional[int] = None,
num_trials_per_iter: int = 64,
builder: Builder.BuilderType = "local",
Expand Down Expand Up @@ -146,6 +147,7 @@ def tune_tir( # pylint: disable=too-many-locals
tasks=tasks,
task_weights=[1.0] * len(tasks),
work_dir=work_dir,
tuning_time=tuning_time,
max_trials_global=max_trials_global,
max_trials_per_task=max_trials_per_task,
num_trials_per_iter=num_trials_per_iter,
Expand All @@ -166,6 +168,7 @@ def _tune_tir(
work_dir: str,
max_trials_global: int,
*,
tuning_time: int = 1000000,
num_trials_per_iter: int = 64,
builder: Builder.BuilderType = "local",
runner: Runner.RunnerType = "local",
Expand Down Expand Up @@ -225,6 +228,7 @@ def _tune_tir(
target,
work_dir,
max_trials_global,
tuning_time=tuning_time,
num_trials_per_iter=num_trials_per_iter,
builder=builder,
runner=runner,
Expand Down
2 changes: 2 additions & 0 deletions python/tvm/meta_schedule/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def tune_tasks(
task_weights: List[float],
work_dir: str,
max_trials_global: int,
tuning_time: int = 1000000,
max_trials_per_task: Optional[int] = None,
num_trials_per_iter: int = 64,
builder: Builder.BuilderType = "local",
Expand Down Expand Up @@ -118,6 +119,7 @@ def tune_tasks(
task_scheduler.tune(
tasks=tasks,
task_weights=task_weights,
tuning_time=tuning_time,
max_trials_global=max_trials_global,
max_trials_per_task=max_trials_per_task,
num_trials_per_iter=num_trials_per_iter,
Expand Down
4 changes: 2 additions & 2 deletions src/meta_schedule/task_scheduler/gradient_based.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@ class GradientBasedNode final : public TaskSchedulerNode {
TVM_DECLARE_FINAL_OBJECT_INFO(GradientBasedNode, TaskSchedulerNode);

public:
void Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights, int max_trials_global,
void Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights, int tuning_time, int max_trials_global,
int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner,
Array<MeasureCallback> measure_callbacks, Optional<Database> database,
Optional<CostModel> cost_model) final {
int n_tasks = tasks.size();
round_robin_rounds_ = 0;
best_latency_history_.resize(n_tasks, std::vector<double>());
TaskSchedulerNode::Tune(tasks, task_weights, max_trials_global, max_trials_per_task,
TaskSchedulerNode::Tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task,
num_trials_per_iter, builder, runner, measure_callbacks, database,
cost_model);
}
Expand Down
21 changes: 15 additions & 6 deletions src/meta_schedule/task_scheduler/task_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
* under the License.
*/
#include "../utils.h"

#include <iostream>
#include <chrono>
namespace tvm {
namespace meta_schedule {

Expand Down Expand Up @@ -141,7 +142,7 @@ void TaskCleanUp(TaskRecordNode* self, int task_id, const Array<RunnerResult>& r
}

void TaskSchedulerNode::Tune(Array<TuneContext> ctxs, Array<FloatImm> task_weights,
int max_trials_global, int max_trials_per_task,
int tuning_time, int max_trials_global, int max_trials_per_task,
int num_trials_per_iter, Builder builder, Runner runner,
Array<MeasureCallback> measure_callbacks, Optional<Database> database,
Optional<CostModel> cost_model) {
Expand Down Expand Up @@ -176,7 +177,15 @@ void TaskSchedulerNode::Tune(Array<TuneContext> ctxs, Array<FloatImm> task_weigh
}

int num_trials_already = 0;
for (int task_id; num_trials_already < max_trials_global && (task_id = NextTaskId()) != -1;) {
double start_time_ = std::chrono::duration_cast<std::chrono::seconds>(
std::chrono::system_clock::now().time_since_epoch()
).count();
//for (int task_id; num_trials_already < max_trials_global && (task_id = NextTaskId()) != -1;) {
for (int task_id;
num_trials_already < max_trials_global &&
(task_id = NextTaskId()) != -1 &&
(std::chrono::duration_cast<std::chrono::seconds>(std::chrono::system_clock::now().time_since_epoch()).count() - start_time_) < tuning_time;
) {
TVM_PY_LOG(INFO, this->logger)
<< "TaskScheduler picks Task #" << task_id << ": " << tasks_[task_id]->ctx->task_name;
TaskRecordNode* task = tasks_[task_id].get();
Expand Down Expand Up @@ -346,17 +355,17 @@ Array<RunnerResult> PyTaskSchedulerNode::JoinRunningTask(int task_id) {
}
}

void PyTaskSchedulerNode::Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights,
void PyTaskSchedulerNode::Tune(Array<TuneContext> tasks, Array<FloatImm> task_weights, int tuning_time,
int max_trials_global, int max_trials_per_task,
int num_trials_per_iter, Builder builder, Runner runner,
Array<MeasureCallback> measure_callbacks,
Optional<Database> database, Optional<CostModel> cost_model) {
if (f_tune == nullptr) {
TaskSchedulerNode::Tune(tasks, task_weights, max_trials_global, max_trials_per_task,
TaskSchedulerNode::Tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task,
num_trials_per_iter, builder, runner, measure_callbacks, database,
cost_model);
} else {
f_tune(tasks, task_weights, max_trials_global, max_trials_per_task, num_trials_per_iter,
f_tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task, num_trials_per_iter,
builder, runner, measure_callbacks, database, cost_model);
}
}
Expand Down