差别

这里会显示出您选择的修订版和当前版本之间的差别。

--- linux:gpu:nvidia显卡:常用脚本:操作gpu显存 [2025/10/22 07:38] – 创建 ctbots
+++ linux:gpu:nvidia显卡:常用脚本:操作gpu显存 [2025/10/23 11:10] (当前版本) – ctbots
@@ 行 1: / 行 1: @@
 ====== 强行占用20G的显存 ======
-因为某些特定的原因，我们必须占用20G的显存
+因为某些特定的原因，我们必须占用 30% 的显存
 <code python use_gpu.py>
 import torch
-import argparse
+import time
-from math import ceil
+device = torch.device("cuda")
+total_memory = torch.cuda.get_device_properties(0).total_memory
-def parse_memory_size(memory_str):
+target_memory = total_memory * 0.3
-    memory_str = memory_str.upper()
+num_elements = int(target_memory / 4)
-    if memory_str.endswith('G'):
+tensor = torch.randn(num_elements, dtype=torch.float32, device=device)
-        gb = float(memory_str[:-1])
+time.sleep(3600000000)
-        return int(gb * 1024**3)
+del tensor
-    elif memory_str.endswith('M'):
+torch.cuda.empty_cache()
-        mb = float(memory_str[:-1])
-        return int(mb * 1024**2)
-    else:
-        raise ValueError("Unsupported unit! Use 'G' (GB) or 'M' (MB), e.g., '20G' or '512M'")
-def allocate_vram(memory_str="20G", gpu_id=0):
-    if not torch.cuda.is_available():
-        raise RuntimeError("No available GPU for VRAM allocation!")
-    target_bytes = parse_memory_size(memory_str)
-    target_gb = target_bytes / (1024**3)
-    print(f"Target VRAM: {target_gb:.2f} GB")
-    gpu_props = torch.cuda.get_device_properties(gpu_id)
-    total_vram_gb = gpu_props.total_memory / (1024**3)
-    print(f"Current GPU: {gpu_props.name} (Total VRAM: {total_vram_gb:.2f} GB)")
-    if target_gb > total_vram_gb:
-        raise RuntimeError(f"Insufficient GPU VRAM! Required: {target_gb:.2f} GB, Available: {total_vram_gb:.2f} GB")
-    element_size = 4
-    total_elements = ceil(target_bytes / element_size)
-    print(f"Required float32 elements: {total_elements:,} (≈ {target_gb:.2f} GB)")
-    chunk_size = 10**9
-    tensors = []
-    allocated_elements = 0
-    while allocated_elements < total_elements:
-        current_chunk = min(chunk_size, total_elements - allocated_elements)
-        tensor = torch.randn(
-            current_chunk,
-            dtype=torch.float32,
-            device=f'cuda:{gpu_id}',
-            requires_grad=False
-        )
-        tensors.append(tensor)
-        allocated_elements += current_chunk
-        allocated_gb = (allocated_elements * element_size) / (1024**3)
-        print(f"Allocated: {allocated_gb:.2f} GB")
-    used_vram = torch.cuda.memory_allocated(gpu_id) / (1024**3)
-    cached_vram = torch.cuda.memory_reserved(gpu_id) / (1024**3)
-    print(f"\nAllocation completed!")
-    print(f"Used VRAM (allocated): {used_vram:.2f} GB")
-    print(f"Cached VRAM (reserved): {cached_vram:.2f} GB")
-    input("\nPress Enter to release VRAM...")
-    del tensors
-    torch.cuda.empty_cache()
-    print("VRAM released")
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="GPU VRAM Allocator (default: 20GB)")
-    parser.add_argument(
-        "--memory",
-        default="20G",
-        help="VRAM size (e.g., '10G' or '512M', default: 20G)"
-    )
-    parser.add_argument(
-        "--gpu",
-        type=int,
-        default=0,
-        help="GPU device ID (default: 0)"
-    )
-    args = parser.parse_args()
-    allocate_vram(args.memory, args.gpu)
 </code>