Add tracking for new tensors, AQT and layouts (#2895)

jerryzh168 · web-flow · commit f02354d82463 · 2025-08-28T18:35:17.000-07:00
Summary:
Add api logging for these things to understand the model checkpoints
that's using these APIs

Test Plan:
internal queries

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/torchao/dtypes/affine_quantized_tensor.py b/torchao/dtypes/affine_quantized_tensor.py
@@ -116,6 +116,7 @@ def __init__(
         dtype=None,
         strides=None,
     ):
+        torch._C._log_api_usage_once(str(type(self)))
         self.tensor_impl = tensor_impl
         self.block_size = block_size
         self.quant_min = quant_min
diff --git a/torchao/dtypes/utils.py b/torchao/dtypes/utils.py
@@ -68,6 +68,9 @@ def __repr__(self):
     def extra_repr(self) -> str:
         return ""
 
+    def __post_init__(self):
+        torch._C._log_api_usage_once(str(type(self)))
+
 
 @dataclass(frozen=True)
 class PlainLayout(Layout):
diff --git a/torchao/quantization/quantize_/workflows/float8/float8_tensor.py b/torchao/quantization/quantize_/workflows/float8/float8_tensor.py
@@ -136,6 +136,7 @@ def __init__(
         kernel_preference: KernelPreference = KernelPreference.AUTO,
         dtype: Optional[torch.dtype] = None,
     ):
+        super().__init__()
         self.qdata = qdata
         self.scale = scale
         self.block_size = block_size
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_marlin_sparse_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_marlin_sparse_tensor.py
@@ -35,6 +35,7 @@ def __new__(cls, qdata, scale, zero_point, meta, block_size, num_bits, shape):
         return torch.Tensor._make_wrapper_subclass(cls, shape, **kwargs)  # type: ignore[attr-defined]
 
     def __init__(self, qdata, scale, zero_point, meta, block_size, num_bits, shape):
+        super().__init__()
         self.qdata = qdata
         self.scale = scale
         self.zero_point = zero_point
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_opaque_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_opaque_tensor.py
@@ -70,6 +70,7 @@ def __init__(
         block_size: List[int],
         shape: torch.Size,
     ):
+        super().__init__()
         self.qdata = qdata
         self.scale_and_zero = scale_and_zero
         self.block_size = block_size
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_preshuffled_tensor.py
@@ -102,6 +102,7 @@ def __init__(
         group_zero: Optional[torch.Tensor] = None,
         row_scale: Optional[torch.Tensor] = None,
     ):
+        super().__init__()
         # one and only one of group_scale and group_zero should be None
         assert group_zero is None or row_scale is None
         assert not (group_zero is not None and row_scale is not None)
diff --git a/torchao/quantization/quantize_/workflows/int4/int4_tensor.py b/torchao/quantization/quantize_/workflows/int4/int4_tensor.py
@@ -75,6 +75,7 @@ def __init__(
         shape: torch.Size,
         act_pre_scale: Optional[torch.Tensor] = None,
     ):
+        super().__init__()
         self.qdata = qdata
         self.scale = scale
         self.zero_point = zero_point
diff --git a/torchao/quantization/quantize_/workflows/intx/intx_opaque_tensor.py b/torchao/quantization/quantize_/workflows/intx/intx_opaque_tensor.py
@@ -37,7 +37,7 @@ class ComputeTarget(enum.Enum):
     ATEN = "aten"
 
     """
-    This packs the tensor for TorchAO CPU kernels by selecting the best available kernel 
+    This packs the tensor for TorchAO CPU kernels by selecting the best available kernel
     based on the quantization scheme, either using KlediAI kernels or lowbit kernels.
     It requires TorchAO C++ kernels to be installed.
     """
@@ -112,6 +112,7 @@ def __init__(
         packed_weights_has_bias,
         compute_target,
     ):
+        super().__init__()
         assert packed_weights.device == torch.device("cpu")
         self.packed_weights = packed_weights
         self.bit_width = bit_width
diff --git a/torchao/quantization/quantize_/workflows/intx/intx_unpacked_to_int8_tensor.py b/torchao/quantization/quantize_/workflows/intx/intx_unpacked_to_int8_tensor.py
@@ -93,6 +93,7 @@ def __init__(
         dtype,
         apply_int8_act_asym_per_token_quant,
     ):
+        super().__init__()
         assert qdata.dtype == torch.int8, (
             f"qdata dtype must be int8, but got {qdata.dtype}"
         )
diff --git a/torchao/utils.py b/torchao/utils.py
@@ -860,6 +860,9 @@ def __init_subclass__(cls, **kwargs):
     get_tensor_impl_constructor = classmethod(_get_tensor_impl_constructor)
     _get_to_kwargs = _get_to_kwargs
 
+    def __init__(self, *args, **kwargs):
+        torch._C._log_api_usage_once(str(type(self)))
+
     def __tensor_flatten__(self):
         if hasattr(self, "tensor_data_names") and hasattr(
             self, "tensor_attribute_names"

Original file line number	Diff line number	Diff line change
`@@ -93,6 +93,7 @@ def __init__(`
`93`	`93`	`dtype,`
`94`	`94`	`apply_int8_act_asym_per_token_quant,`
`95`	`95`	`):`
	`96`	`+ super().__init__()`
`96`	`97`	`assert qdata.dtype == torch.int8, (`
`97`	`98`	`f"qdata dtype must be int8, but got {qdata.dtype}"`
`98`	`99`	`)`