diff --git a/cmake/config.cmake b/cmake/config.cmake index ec72fc383e79..29d3d3d2b34b 100644 --- a/cmake/config.cmake +++ b/cmake/config.cmake @@ -46,7 +46,7 @@ # - ON: enable CUDA with cmake's auto search # - OFF: disable CUDA # - /path/to/cuda: use specific path to cuda toolkit -set(USE_CUDA OFF) +set(USE_CUDA ON) # Whether to enable NCCL support: # - ON: enable NCCL with cmake's auto search @@ -158,7 +158,7 @@ set(USE_PROFILER ON) # - OFF: disable llvm, note this will disable CPU codegen # which is needed for most cases # - /path/to/llvm-config: enable specific LLVM when multiple llvm-dev is available. -set(USE_LLVM "/projs/AE/dongshouyang/clang+llvm-11.0.0-x86_64-linux-gnu-ubuntu-20.04/bin/llvm-config") +set(USE_LLVM ON) # Whether use MLIR to help analyze, requires USE_LLVM is enabled # Possible values: ON/OFF diff --git a/include/tvm/meta_schedule/task_scheduler.h b/include/tvm/meta_schedule/task_scheduler.h index f4fc491286dd..a2794126d941 100644 --- a/include/tvm/meta_schedule/task_scheduler.h +++ b/include/tvm/meta_schedule/task_scheduler.h @@ -167,6 +167,7 @@ class TaskSchedulerNode : public runtime::Object { * \brief Jointly tune a given list of tasks. * \param tasks The tasks to be tuned * \param task_weights The weight of each task + * \param tuning_time The time of tuning * \param max_trials_global The maximum number of trials to be performed globally * \param max_trials_per_task The maximum number of trials to be performed for each task * \param num_trials_per_iter The number of trials to be performed in each iteration @@ -178,6 +179,7 @@ class TaskSchedulerNode : public runtime::Object { */ virtual void Tune(Array tasks, // Array task_weights, // + int tuning_time, // int max_trials_global, // int max_trials_per_task, // int num_trials_per_iter, // @@ -221,6 +223,7 @@ class PyTaskSchedulerNode : public TaskSchedulerNode { /*! \brief The function type of `Tune` method. */ using FTune = runtime::TypedPackedFunc tasks, // Array task_weights, // + int tuning_time, // int max_trials_global, // int max_trials_per_task, // int num_trials_per_iter, // @@ -246,7 +249,7 @@ class PyTaskSchedulerNode : public TaskSchedulerNode { int NextTaskId() final; Array JoinRunningTask(int task_id) final; - void Tune(Array tasks, Array task_weights, int max_trials_global, + void Tune(Array tasks, Array task_weights, int tuning_time, int max_trials_global, int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner, Array measure_callbacks, Optional database, Optional cost_model) final; diff --git a/python/tvm/meta_schedule/task_scheduler/task_scheduler.py b/python/tvm/meta_schedule/task_scheduler/task_scheduler.py index d56d944474e9..2412fada6f8c 100644 --- a/python/tvm/meta_schedule/task_scheduler/task_scheduler.py +++ b/python/tvm/meta_schedule/task_scheduler/task_scheduler.py @@ -94,6 +94,7 @@ def tune( self, tasks: List[TuneContext], task_weights: List[float], + tuning_time: int, max_trials_global: int, max_trials_per_task: int, num_trials_per_iter: int, @@ -133,6 +134,7 @@ def tune( self, tasks, task_weights, + tuning_time, max_trials_global, max_trials_per_task, num_trials_per_iter, @@ -236,6 +238,7 @@ def tune( self, tasks: List[TuneContext], task_weights: List[float], + tuning_time, max_trials_global: int, max_trials_per_task: int, builder: Builder, @@ -250,6 +253,7 @@ def tune( self._outer(), # type: ignore # pylint: disable=no-member tasks, task_weights, + tuning_time, max_trials_global, max_trials_per_task, builder, diff --git a/python/tvm/meta_schedule/tir_integration.py b/python/tvm/meta_schedule/tir_integration.py index 201cc804d6c8..135ce64f2f93 100644 --- a/python/tvm/meta_schedule/tir_integration.py +++ b/python/tvm/meta_schedule/tir_integration.py @@ -46,6 +46,7 @@ def tune_tir( # pylint: disable=too-many-locals work_dir: str, max_trials_global: int, *, + tuning_time: int = 1000000, max_trials_per_task: Optional[int] = None, num_trials_per_iter: int = 64, builder: Builder.BuilderType = "local", @@ -146,6 +147,7 @@ def tune_tir( # pylint: disable=too-many-locals tasks=tasks, task_weights=[1.0] * len(tasks), work_dir=work_dir, + tuning_time=tuning_time, max_trials_global=max_trials_global, max_trials_per_task=max_trials_per_task, num_trials_per_iter=num_trials_per_iter, @@ -166,6 +168,7 @@ def _tune_tir( work_dir: str, max_trials_global: int, *, + tuning_time: int = 1000000, num_trials_per_iter: int = 64, builder: Builder.BuilderType = "local", runner: Runner.RunnerType = "local", @@ -225,6 +228,7 @@ def _tune_tir( target, work_dir, max_trials_global, + tuning_time=tuning_time, num_trials_per_iter=num_trials_per_iter, builder=builder, runner=runner, diff --git a/python/tvm/meta_schedule/tune.py b/python/tvm/meta_schedule/tune.py index 887941ada0d2..c282b3d681c8 100644 --- a/python/tvm/meta_schedule/tune.py +++ b/python/tvm/meta_schedule/tune.py @@ -32,6 +32,7 @@ def tune_tasks( task_weights: List[float], work_dir: str, max_trials_global: int, + tuning_time: int = 1000000, max_trials_per_task: Optional[int] = None, num_trials_per_iter: int = 64, builder: Builder.BuilderType = "local", @@ -118,6 +119,7 @@ def tune_tasks( task_scheduler.tune( tasks=tasks, task_weights=task_weights, + tuning_time=tuning_time, max_trials_global=max_trials_global, max_trials_per_task=max_trials_per_task, num_trials_per_iter=num_trials_per_iter, diff --git a/src/meta_schedule/task_scheduler/gradient_based.cc b/src/meta_schedule/task_scheduler/gradient_based.cc index 5b261eec32a4..aedb055e2cff 100644 --- a/src/meta_schedule/task_scheduler/gradient_based.cc +++ b/src/meta_schedule/task_scheduler/gradient_based.cc @@ -44,14 +44,14 @@ class GradientBasedNode final : public TaskSchedulerNode { TVM_DECLARE_FINAL_OBJECT_INFO(GradientBasedNode, TaskSchedulerNode); public: - void Tune(Array tasks, Array task_weights, int max_trials_global, + void Tune(Array tasks, Array task_weights, int tuning_time, int max_trials_global, int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner, Array measure_callbacks, Optional database, Optional cost_model) final { int n_tasks = tasks.size(); round_robin_rounds_ = 0; best_latency_history_.resize(n_tasks, std::vector()); - TaskSchedulerNode::Tune(tasks, task_weights, max_trials_global, max_trials_per_task, + TaskSchedulerNode::Tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task, num_trials_per_iter, builder, runner, measure_callbacks, database, cost_model); } diff --git a/src/meta_schedule/task_scheduler/task_scheduler.cc b/src/meta_schedule/task_scheduler/task_scheduler.cc index 404ee01983c5..0f4697ddb166 100644 --- a/src/meta_schedule/task_scheduler/task_scheduler.cc +++ b/src/meta_schedule/task_scheduler/task_scheduler.cc @@ -17,7 +17,8 @@ * under the License. */ #include "../utils.h" - +#include +#include namespace tvm { namespace meta_schedule { @@ -141,7 +142,7 @@ void TaskCleanUp(TaskRecordNode* self, int task_id, const Array& r } void TaskSchedulerNode::Tune(Array ctxs, Array task_weights, - int max_trials_global, int max_trials_per_task, + int tuning_time, int max_trials_global, int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner, Array measure_callbacks, Optional database, Optional cost_model) { @@ -176,7 +177,15 @@ void TaskSchedulerNode::Tune(Array ctxs, Array task_weigh } int num_trials_already = 0; - for (int task_id; num_trials_already < max_trials_global && (task_id = NextTaskId()) != -1;) { + double start_time_ = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch() + ).count(); + //for (int task_id; num_trials_already < max_trials_global && (task_id = NextTaskId()) != -1;) { + for (int task_id; + num_trials_already < max_trials_global && + (task_id = NextTaskId()) != -1 && + (std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count() - start_time_) < tuning_time; + ) { TVM_PY_LOG(INFO, this->logger) << "TaskScheduler picks Task #" << task_id << ": " << tasks_[task_id]->ctx->task_name; TaskRecordNode* task = tasks_[task_id].get(); @@ -346,17 +355,17 @@ Array PyTaskSchedulerNode::JoinRunningTask(int task_id) { } } -void PyTaskSchedulerNode::Tune(Array tasks, Array task_weights, +void PyTaskSchedulerNode::Tune(Array tasks, Array task_weights, int tuning_time, int max_trials_global, int max_trials_per_task, int num_trials_per_iter, Builder builder, Runner runner, Array measure_callbacks, Optional database, Optional cost_model) { if (f_tune == nullptr) { - TaskSchedulerNode::Tune(tasks, task_weights, max_trials_global, max_trials_per_task, + TaskSchedulerNode::Tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task, num_trials_per_iter, builder, runner, measure_callbacks, database, cost_model); } else { - f_tune(tasks, task_weights, max_trials_global, max_trials_per_task, num_trials_per_iter, + f_tune(tasks, task_weights, tuning_time, max_trials_global, max_trials_per_task, num_trials_per_iter, builder, runner, measure_callbacks, database, cost_model); } }