change licesning with meta. Tensor is torch.Tensor. remove __future__.

alfieroddanintel · alfieroddanintel · commit 3a418ae82370 · 2025-11-14T03:18:20.000+09:00
diff --git a/src/anomalib/models/components/dinov2/dinov2_loader.py b/src/anomalib/models/components/dinov2/dinov2_loader.py
@@ -27,8 +27,6 @@
 downstream anomaly detection tasks.
 """
 
-from __future__ import annotations
-
 import logging
 from pathlib import Path
 from typing import ClassVar
diff --git a/src/anomalib/models/components/dinov2/layers/attention.py b/src/anomalib/models/components/dinov2/layers/attention.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """Attention layers for DINOv2 Vision Transformers.
 
@@ -11,12 +13,10 @@
 blocks for feature extraction and masked modeling.
 """
 
-from __future__ import annotations
-
 import logging
 
 import torch
-from torch import Tensor, nn
+from torch import nn
 from torch.nn import functional as F  # noqa: N812
 
 logger = logging.getLogger(__name__)
@@ -82,15 +82,15 @@ def init_weights(
         if self.proj.bias is not None:
             nn.init.zeros_(self.proj.bias)
 
-    def forward(self, x: Tensor, is_causal: bool = False) -> Tensor:
+    def forward(self, x: torch.Tensor, is_causal: bool = False) -> torch.Tensor:
         """Apply multi-head self-attention.
 
         Args:
             x: Input sequence of shape ``(B, N, C)``.
             is_causal: If True, applies causal masking.
 
         Returns:
-            Tensor of shape ``(B, N, C)`` containing attended features.
+            torch.Tensor of shape ``(B, N, C)`` containing attended features.
         """
         b, n, c = x.shape
         qkv = self.qkv(x).reshape(b, n, 3, self.num_heads, c // self.num_heads)
@@ -120,7 +120,7 @@ class MemEffAttention(Attention):
     this implementation uses the scaled dot product from torch.
     """
 
-    def forward(self, x: Tensor, attn_bias: Tensor | None = None) -> Tensor:
+    def forward(self, x: torch.Tensor, attn_bias: torch.Tensor | None = None) -> torch.Tensor:
         """Compute memory-efficient attention using PyTorch's scaled dot product attention.
 
         Args:
diff --git a/src/anomalib/models/components/dinov2/layers/block.py b/src/anomalib/models/components/dinov2/layers/block.py
@@ -1,35 +1,27 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+
 
 """Transformer blocks used in DINOv2 Vision Transformers.
 
 This module implements:
 - Standard transformer blocks with attention and MLP (`Block`)
 - Causal attention blocks (`CausalAttentionBlock`)
-
-The implementation is adapted from the original DINO and timm Vision
-Transformer code:
-
-- https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
-- https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
 """
 
-from __future__ import annotations
-
 import logging
-from typing import TYPE_CHECKING
+from collections.abc import Callable
 
 import torch
-from torch import Tensor, nn
+from torch import nn
 
 from .attention import Attention
 from .drop_path import DropPath
 from .layer_scale import LayerScale
 from .mlp import Mlp
 
-if TYPE_CHECKING:
-    from collections.abc import Callable
-
 logger = logging.getLogger("dinov2")
 
 
@@ -66,7 +58,7 @@ def __init__(
         ffn_bias: bool = True,
         drop: float = 0.0,
         attn_drop: float = 0.0,
-        init_values: float | Tensor | None = None,
+        init_values: float | torch.Tensor | None = None,
         drop_path: float = 0.0,
         act_layer: Callable[..., nn.Module] = nn.GELU,
         norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
@@ -101,13 +93,13 @@ def __init__(
 
         self.sample_drop_ratio: float = drop_path
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply attention and MLP residual blocks with optional stochastic depth."""
 
-        def attn_residual_func(inp: Tensor) -> Tensor:
+        def attn_residual_func(inp: torch.Tensor) -> torch.Tensor:
             return self.ls1(self.attn(self.norm1(inp)))
 
-        def ffn_residual_func(inp: Tensor) -> Tensor:
+        def ffn_residual_func(inp: torch.Tensor) -> torch.Tensor:
             return self.ls2(self.mlp(self.norm2(inp)))
 
         if self.training and self.sample_drop_ratio > 0.1:
@@ -200,17 +192,17 @@ def init_weights(
         nn.init.normal_(self.feed_forward.fc2.weight, std=init_proj_std)
         self.ffn_norm.reset_parameters()
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply causal attention followed by a feed-forward block."""
         x_attn = x + self.ls1(self.attention(self.attention_norm(x), self.is_causal))
         return x_attn + self.ls2(self.feed_forward(self.ffn_norm(x_attn)))
 
 
 def drop_add_residual_stochastic_depth(
-    x: Tensor,
-    residual_func: Callable[[Tensor], Tensor],
+    x: torch.Tensor,
+    residual_func: Callable[[torch.Tensor], torch.Tensor],
     sample_drop_ratio: float = 0.0,
-) -> Tensor:
+) -> torch.Tensor:
     """Apply stochastic depth to a residual branch on a subset of samples.
 
     Args:
@@ -219,7 +211,7 @@ def drop_add_residual_stochastic_depth(
         sample_drop_ratio: Fraction of samples to drop for residual computation.
 
     Returns:
-        Tensor with residual added to a subset of samples.
+        torch.Tensor with residual added to a subset of samples.
     """
     b, _, _ = x.shape
     sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
diff --git a/src/anomalib/models/components/dinov2/layers/dino_head.py b/src/anomalib/models/components/dinov2/layers/dino_head.py
@@ -1,16 +1,17 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
+
 
 """DINO projection head module.
 
 Reference:
 https://github.com/facebookresearch/dinov2/blob/main/dinov2/layers/dino_head.py
 """
 
-from __future__ import annotations
-
 import torch
-from torch import Tensor, nn
+from torch import nn
 from torch.nn.init import trunc_normal_
 from torch.nn.utils import weight_norm
 
@@ -68,7 +69,7 @@ def _init_weights(self, module: nn.Module) -> None:  # noqa: PLR6301
             if module.bias is not None:
                 nn.init.constant_(module.bias, 0)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Run the DINO projection head forward pass."""
         x = self.mlp(x)
 
diff --git a/src/anomalib/models/components/dinov2/layers/drop_path.py b/src/anomalib/models/components/dinov2/layers/drop_path.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """Stochastic depth drop-path implementation used in DINOv2.
 
@@ -8,12 +10,11 @@
 drops entire residual branches during training to improve model robustness.
 """
 
-from __future__ import annotations
-
-from torch import Tensor, nn
+import torch
+from torch import nn
 
 
-def drop_path(x: Tensor, drop_prob: float = 0.0, training: bool = False) -> Tensor:
+def drop_path(x: torch.torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
     """Apply stochastic depth to an input tensor.
 
     Args:
@@ -22,7 +23,7 @@ def drop_path(x: Tensor, drop_prob: float = 0.0, training: bool = False) -> Tens
         training: Whether the module is in training mode.
 
     Returns:
-        Tensor with dropped paths applied during training, or the original
+        torch.Tensor with dropped paths applied during training, or the original
         tensor during evaluation.
 
     Notes:
@@ -57,6 +58,6 @@ def __init__(self, drop_prob: float | None = None) -> None:
         super().__init__()
         self.drop_prob = drop_prob if drop_prob is not None else 0.0
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Forward pass applying stochastic depth."""
         return drop_path(x, drop_prob=self.drop_prob, training=self.training)
diff --git a/src/anomalib/models/components/dinov2/layers/layer_scale.py b/src/anomalib/models/components/dinov2/layers/layer_scale.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """LayerScale module used in DINOv2.
 
@@ -8,10 +10,8 @@
 Vision Transformers with residual connections.
 """
 
-from __future__ import annotations
-
 import torch
-from torch import Tensor, nn
+from torch import nn
 
 
 class LayerScale(nn.Module):
@@ -33,7 +33,7 @@ class LayerScale(nn.Module):
     def __init__(
         self,
         dim: int,
-        init_values: float | Tensor = 1e-5,
+        init_values: float | torch.Tensor = 1e-5,
         inplace: bool = False,
         device: torch.device | None = None,
         dtype: torch.dtype | None = None,
@@ -48,6 +48,6 @@ def reset_parameters(self) -> None:
         """Reset scale parameters to their initialization values."""
         nn.init.constant_(self.gamma, self.init_values)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply channel-wise scaling to the input tensor."""
         return x.mul_(self.gamma) if self.inplace else x * self.gamma
diff --git a/src/anomalib/models/components/dinov2/layers/mlp.py b/src/anomalib/models/components/dinov2/layers/mlp.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """Feed-forward MLP block used in DINOv2 Vision Transformers.
 
@@ -8,14 +10,10 @@
 inside each transformer block.
 """
 
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from torch import Tensor, nn
+from collections.abc import Callable
 
-if TYPE_CHECKING:
-    from collections.abc import Callable
+import torch
+from torch import nn
 
 
 class Mlp(nn.Module):
@@ -51,7 +49,7 @@ def __init__(
         self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
         self.drop = nn.Dropout(drop)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply the two-layer feed-forward transformation."""
         x = self.fc1(x)
         x = self.act(x)
diff --git a/src/anomalib/models/components/dinov2/layers/patch_embed.py b/src/anomalib/models/components/dinov2/layers/patch_embed.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """Patch embedding module for DINOv2 Vision Transformers.
 
@@ -8,14 +10,10 @@
 optional output reshaping, and optional normalization.
 """
 
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from torch import Tensor, nn
+from collections.abc import Callable
 
-if TYPE_CHECKING:
-    from collections.abc import Callable
+import torch
+from torch import nn
 
 
 def make_2tuple(x: int | tuple[int, int]) -> tuple[int, int]:
@@ -79,7 +77,7 @@ def __init__(
         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw)
         self.norm = norm_layer(embed_dim) if norm_layer is not None else nn.Identity()
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Embed the input image into patch tokens."""
         _, _, h, w = x.shape
         patch_h, patch_w = self.patch_size
diff --git a/src/anomalib/models/components/dinov2/layers/swiglu_ffn.py b/src/anomalib/models/components/dinov2/layers/swiglu_ffn.py
@@ -1,5 +1,7 @@
 # Copyright (C) 2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
+# Copyright (C) 2025 Meta Platforms, Inc. and affiliates.
+# SPDX-License-Identifier: Apache-2.0
 
 """SwiGLU-based feed-forward layers used in DINOv2.
 
@@ -12,15 +14,11 @@
 These layers are used as transformer FFN blocks in DINOv2 models.
 """
 
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
+from collections.abc import Callable
 
+import torch
 import torch.nn.functional as F  # noqa: N812
-from torch import Tensor, nn
-
-if TYPE_CHECKING:
-    from collections.abc import Callable
+from torch import nn
 
 
 class SwiGLUFFN(nn.Module):
@@ -55,7 +53,7 @@ def __init__(
         self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
         self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply the SwiGLU feed-forward transformation."""
         x12 = self.w12(x)
         x1, x2 = x12.chunk(2, dim=-1)
@@ -138,7 +136,7 @@ def __init__(
         self.w2 = nn.Linear(in_features, hidden_aligned, bias=bias, device=device)
         self.w3 = nn.Linear(hidden_aligned, out_features, bias=bias, device=device)
 
-    def forward(self, x: Tensor) -> Tensor:
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
         """Apply aligned SwiGLU feed-forward transformation."""
         x1 = self.w1(x)
         x2 = self.w2(x)
diff --git a/src/anomalib/models/components/dinov2/vision_transformer.py b/src/anomalib/models/components/dinov2/vision_transformer.py
diff --git a/src/anomalib/models/image/dinomaly/torch_model.py b/src/anomalib/models/image/dinomaly/torch_model.py
diff --git a/tests/unit/models/components/dinov2/test_vit.py b/tests/unit/models/components/dinov2/test_vit.py