Source code for hello.utils.cuda
import timeit
import torch
import torch.utils.benchmark as benchmark
[docs]def empty_cache():
torch.cuda.empty_cache()
[docs]def zeros(size=1024, device="cuda:0", times=500):
x = []
for _ in range(times):
x.append(torch.zeros(size, size, size, dtype=torch.int8, device=device))
print(f"memory allocated: {torch.cuda.memory_allocated()/(1024**3):04} GB")
[docs]def matmul(size=1024, device="cuda:0", times=500):
x = torch.randn(size, size, device=device)
for _ in range(times):
torch.matmul(x, x)
[docs]def timeit_copy(size=1024, device="cuda:0", times=500, mode="benchmark"):
torch.cuda.set_device(device)
x = torch.zeros(size, size, size, dtype=torch.int8, device="cpu")
if mode.lower() == "benchmark":
print("[INFO] Benchmarking with torch.utils.benchmark.Timer")
m = benchmark
else:
print("[INFO] Benchmarking with timeit.Timer")
m = timeit
t = m.Timer(
stmt="x.to('cuda')",
setup="import torch",
globals={"x": x},
)
if mode.lower() == "benchmark":
print(t.timeit(times))
else:
print(f"{t.timeit(times) / times * 1e6:>5.3f} us")
[docs]def timeit_matmul(size=1024, device="cuda:0", times=500, mode="benchmark"):
torch.cuda.set_device(device)
x = torch.randn(size, size, dtype=torch.float32, device="cuda")
if mode.lower() == "benchmark":
print("[INFO] Benchmarking with torch.utils.benchmark.Timer")
m = benchmark
else:
print("[INFO] Benchmarking with timeit.Timer")
m = timeit
t = m.Timer(
stmt="torch.matmul(x, x)",
setup="import torch",
globals={"x": x},
)
if mode.lower() == "benchmark":
print(t.timeit(times))
else:
print(f"{t.timeit(times) / times * 1e6:>5.3f} us")