diff --git a/test/dtypes/test_nf4.py b/test/dtypes/test_nf4.py index a42a209a38..03b41c1d4d 100644 --- a/test/dtypes/test_nf4.py +++ b/test/dtypes/test_nf4.py @@ -757,7 +757,7 @@ def world_size(self) -> int: return 2 @skip_if_lt_x_gpu(2) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need GPU available") def test_comm(self): self.run_subtests( {"input_size": [512, 2048]}, diff --git a/test/quantization/quantize_/workflows/int8/test_int8_tensor.py b/test/quantization/quantize_/workflows/int8/test_int8_tensor.py index 7f510cedbf..9d2c2ac437 100644 --- a/test/quantization/quantize_/workflows/int8/test_int8_tensor.py +++ b/test/quantization/quantize_/workflows/int8/test_int8_tensor.py @@ -26,7 +26,7 @@ from torchao.quantization.utils import compute_error, get_block_size from torchao.testing.model_architectures import ToyTwoLinearModel from torchao.testing.utils import TorchAOIntegrationTestCase -from torchao.utils import torch_version_at_least +from torchao.utils import get_current_accelerator_device, torch_version_at_least INT8_TEST_CONFIGS = [ Int8WeightOnlyConfig(version=2, granularity=PerTensor()), @@ -38,9 +38,10 @@ version=2, granularity=PerRow(), act_mapping_type=MappingType.SYMMETRIC ), ] +_DEVICE = get_current_accelerator_device() -@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") +@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @common_utils.instantiate_parametrized_tests class TestInt8Tensor(TorchAOIntegrationTestCase): def setUp(self): @@ -60,7 +61,7 @@ def test_creation_and_attributes(self, config): self.test_shape[0], bias=False, dtype=self.dtype, - device="cuda", + device=_DEVICE, ) quantize_(linear, config) @@ -99,8 +100,8 @@ def test_int8_linear_variants( torch.compiler.reset() M, N, K = sizes - input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda") - model = ToyTwoLinearModel(K, N, K, dtype=dtype, device="cuda").eval() + input_tensor = torch.randn(*M, K, dtype=dtype, device=_DEVICE) + model = ToyTwoLinearModel(K, N, K, dtype=dtype, device=_DEVICE).eval() model_q = copy.deepcopy(model) quantize_(model_q, config) @@ -128,7 +129,7 @@ def test_int8_linear_variants( ) @common_utils.parametrize("config", INT8_TEST_CONFIGS) - @common_utils.parametrize("device", ["cpu", "cuda"]) + @common_utils.parametrize("device", ["cpu", _DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16, torch.float16]) def test_slice(self, config, device, dtype): """Test tensor slicing with per-row quantization""" @@ -159,8 +160,8 @@ def test_slice(self, config, device, dtype): def test_index_select(self, config): """test that `x_0 = x[0]` works when `x` is a 2D quantized tensor.""" N, K = 256, 512 - x = torch.randn(N, K, device="cuda", dtype=torch.bfloat16) - linear = torch.nn.Linear(K, N, bias=False, dtype=torch.bfloat16, device="cuda") + x = torch.randn(N, K, device=_DEVICE, dtype=torch.bfloat16) + linear = torch.nn.Linear(K, N, bias=False, dtype=torch.bfloat16, device=_DEVICE) linear.weight.data = x quantize_(linear, config) @@ -187,7 +188,7 @@ def test_index_select(self, config): def test_dequantization_accuracy(self, config): """Test dequantization accuracy separately""" linear = torch.nn.Linear( - 256, 512, bias=False, dtype=torch.bfloat16, device="cuda" + 256, 512, bias=False, dtype=torch.bfloat16, device=_DEVICE ) weight_fp = copy.deepcopy(linear.weight) quantize_(linear, config) @@ -208,14 +209,14 @@ def test_available_gpu_kernels(self): M, K, N = 128, 256, 512 m = torch.nn.Sequential( - torch.nn.Linear(K, N, device="cuda", dtype=torch.bfloat16) + torch.nn.Linear(K, N, device=_DEVICE, dtype=torch.bfloat16) ) config = Int8DynamicActivationInt8WeightConfig(version=2) quantize_(m, config) m = torch.compile(m) - x = torch.randn(M, K, device="cuda", dtype=torch.bfloat16) + x = torch.randn(M, K, device=_DEVICE, dtype=torch.bfloat16) out, code = run_and_get_code(m, x) @@ -248,7 +249,7 @@ def test_pin_memory(self, config): ) -@unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") +@unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @common_utils.instantiate_parametrized_tests class TestInt8StaticQuant(TorchAOIntegrationTestCase): @common_utils.parametrize("granularity", [PerRow(), PerTensor()]) @@ -257,9 +258,9 @@ def test_static_activation_per_row_int8_weight(self, granularity, dtype): torch.compiler.reset() M, N, K = 32, 32, 32 - input_tensor = torch.randn(M, K, dtype=dtype, device="cuda") + input_tensor = torch.randn(M, K, dtype=dtype, device=_DEVICE) - model = torch.nn.Linear(K, N, bias=False).eval().to(device="cuda", dtype=dtype) + model = torch.nn.Linear(K, N, bias=False).eval().to(device=_DEVICE, dtype=dtype) model_static_quant = copy.deepcopy(model) model_dynamic_quant = copy.deepcopy(model)