forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDeviceAccelerator.h
More file actions
113 lines (91 loc) · 4.21 KB
/
DeviceAccelerator.h
File metadata and controls
113 lines (91 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#pragma once
#include <c10/core/CachingDeviceAllocator.h>
#include <c10/core/DeviceCapability.h>
#include <c10/core/DeviceType.h>
#include <c10/macros/Macros.h>
#include <ATen/accelerator/Graph.h>
#include <optional>
namespace at::accelerator {
// Note [Accelerator Concept]
// This file defines the top level Accelerator concept for PyTorch.
// A device is an accelerator per the definition here if:
// - It is mutually exclusive with all other accelerators
// - It performs asynchronous compute via a Stream/Event system
// - It provides a set of common APIs as defined by AcceleratorHooksInterface
//
// As of today, accelerator devices are (in no particular order):
// CUDA, MTIA, XPU, HIP, MPS, PrivateUse1
// Ensures that only one accelerator is available (at
// compile time if possible) and return it.
// When checked is true, the returned optional always has a value.
TORCH_API std::optional<c10::DeviceType> getAccelerator(bool checked = false);
// Check if the given device type is an accelerator.
TORCH_API bool isAccelerator(c10::DeviceType device_type);
// Check if the given device type is an accelerator, not the excluded ones.
template <
typename... T,
typename = std::enable_if_t<(std::is_same_v<T, c10::DeviceType> && ...)>>
inline bool isAcceleratorExcluded(
c10::DeviceType device_type,
c10::DeviceType first_excluded,
T... rest_excluded) {
if constexpr (sizeof...(rest_excluded) > 0) {
return device_type != first_excluded &&
isAcceleratorExcluded(device_type, rest_excluded...);
} else {
return device_type != first_excluded && isAccelerator(device_type);
}
}
// Return the number of the device available. Note that this is *REQUIRED* to
// not raise any exception.
TORCH_API c10::DeviceIndex deviceCount();
// Set the current device index to the given device index.
TORCH_API void setDeviceIndex(c10::DeviceIndex device_index);
// Get the current device index.
TORCH_API c10::DeviceIndex getDeviceIndex();
// Set the current stream to a given stream. Note that this API doesn't change
// the current device index.
TORCH_API void setCurrentStream(c10::Stream stream);
// Get the current stream of the given device index.
TORCH_API c10::Stream getCurrentStream(c10::DeviceIndex device_index);
// Wait (by blocking the calling thread) until all the work previously enqueued
// on the given device index has been completed.
TORCH_API void synchronizeDevice(c10::DeviceIndex device_index);
// Set the current device index to the given device_index and return the
// original device index that was active before the change.
TORCH_API c10::DeviceIndex exchangeDevice(c10::DeviceIndex device_index);
// Set the current device index to the given device_index. Avoid creating a new
// context if the context for device_index is not initialized. Return the
// original device index that was active before the change.
TORCH_API c10::DeviceIndex maybeExchangeDevice(c10::DeviceIndex device_index);
// Get the device capability of the given device index.
TORCH_API c10::DeviceCapability getDeviceCapability(
c10::DeviceIndex device_index);
TORCH_API inline void emptyCache() {
const auto device_type = getAccelerator(true).value();
at::getDeviceAllocator(device_type)->emptyCache();
}
TORCH_API inline at::CachingDeviceAllocator::DeviceStats getDeviceStats(
c10::DeviceIndex device_index) {
const auto device_type = getAccelerator(true).value();
return at::getDeviceAllocator(device_type)->getDeviceStats(device_index);
}
TORCH_API inline void resetAccumulatedStats(c10::DeviceIndex device_index) {
const auto device_type = getAccelerator(true).value();
at::getDeviceAllocator(device_type)->resetAccumulatedStats(device_index);
}
TORCH_API inline void resetPeakStats(c10::DeviceIndex device_index) {
const auto device_type = getAccelerator(true).value();
at::getDeviceAllocator(device_type)->resetPeakStats(device_index);
}
TORCH_API inline std::pair<size_t, size_t> getMemoryInfo(
c10::DeviceIndex device_index) {
const auto device_type = getAccelerator(true).value();
return at::getDeviceAllocator(device_type)->getMemoryInfo(device_index);
}
} // namespace at::accelerator
namespace at {
// Keep BC only
using at::accelerator::getAccelerator;
using at::accelerator::isAccelerator;
} // namespace at