-
Notifications
You must be signed in to change notification settings - Fork 845
π feat(model): add GLASS model into Anomalib #2629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feature/model/glass
Are you sure you want to change the base?
Changes from 20 commits
5b4931b
4789f49
050fd4c
cdd0984
381eec6
9b1c51a
161005c
3d78beb
617cf49
f9d3207
7fea20f
1beedf5
838bc50
1baa0b7
f066b3c
6e780b0
b1be6f5
20d97dd
d5affe4
f008537
a1097e5
7e9d4d4
44dcd60
da57095
ba5a6dd
714a3c3
1a3519c
9e12285
5466d46
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| # Copyright (C) 2025 Intel Corporation | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """GLASS - Unsupervised anomaly detection via Gradient Ascent for Industrial Anomaly detection and localization. | ||
|
|
||
| This module implements the GLASS model for unsupervised anomaly detection and localization. GLASS synthesizes both | ||
| global and local anomalies using Gaussian noise guided by gradient ascent to enhance weak defect detection in | ||
| industrial settings. | ||
|
|
||
| The model consists of: | ||
| - A feature extractor and feature adaptor to obtain robust normal representations | ||
| - A Global Anomaly Synthesis (GAS) module that perturbs features using Gaussian noise and gradient ascent with | ||
| truncated projection | ||
| - A Local Anomaly Synthesis (LAS) module that overlays augmented textures onto images using Perlin noise masks | ||
| - A shared discriminator trained with features from normal, global, and local synthetic samples | ||
|
|
||
| Paper: `A Unified Anomaly Synthesis Strategy with Gradient Ascent for Industrial Anomaly Detection and Localization | ||
| <https://arxiv.org/pdf/2407.09359>` | ||
| """ | ||
|
|
||
| from .lightning_model import Glass | ||
|
|
||
| __all__ = ["Glass"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,326 @@ | ||
| # Copyright (C) 2025 Intel Corporation | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| """GLASS - Unsupervised anomaly detection via Gradient Ascent for Industrial Anomaly detection and localization. | ||
code-dev05 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| This module implements the GLASS model for unsupervised anomaly detection and localization. GLASS synthesizes both | ||
| global and local anomalies using Gaussian noise guided by gradient ascent to enhance weak defect detection in | ||
| industrial settings. | ||
|
|
||
| The model consists of: | ||
| - A feature extractor and feature adaptor to obtain robust normal representations | ||
| - A Global Anomaly Synthesis (GAS) module that perturbs features using Gaussian noise and gradient ascent with | ||
| truncated projection | ||
| - A Local Anomaly Synthesis (LAS) module that overlays augmented textures onto images using Perlin noise masks | ||
| - A shared discriminator trained with features from normal, global, and local synthetic samples | ||
|
|
||
| Paper: `A Unified Anomaly Synthesis Strategy with Gradient Ascent for Industrial Anomaly Detection and Localization | ||
| <https://arxiv.org/pdf/2407.09359>` | ||
| """ | ||
|
|
||
| from typing import Any | ||
|
|
||
| from lightning.pytorch.utilities.types import STEP_OUTPUT | ||
| from torch import optim | ||
| from torchvision.transforms.v2 import CenterCrop, Compose, Normalize, Resize | ||
|
|
||
| from anomalib import LearningType | ||
| from anomalib.data import Batch | ||
| from anomalib.metrics import Evaluator | ||
| from anomalib.models.components import AnomalibModule | ||
| from anomalib.post_processing import PostProcessor | ||
| from anomalib.pre_processing import PreProcessor | ||
| from anomalib.visualization import Visualizer | ||
|
|
||
| from .torch_model import GlassModel | ||
|
|
||
|
|
||
| class Glass(AnomalibModule): | ||
code-dev05 marked this conversation as resolved.
Show resolved
Hide resolved
code-dev05 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| """PyTorch Lightning Implementation of the GLASS Model. | ||
|
|
||
| The model uses a pre-trained feature extractor to extract features and a feature adaptor to mitigate latent domain | ||
| bias. | ||
| Global anomaly features are synthesized from adapted normal features using gradient ascent. | ||
| Local anomaly images are synthesized using texture overlay datasets like dtd which are then processed by feature | ||
| extractor and feature adaptor. | ||
| All three different features are passed to the discriminator trained using loss functions. | ||
|
|
||
| Args: | ||
| input_shape (tuple[int, int]): Input image dimensions as a tuple of (height, width). Required for shaping the | ||
| input pipeline. | ||
| anomaly_source_path (str): Path to the dataset or source directory containing normal images and anomaly textures | ||
| backbone (str, optional): Name of the CNN backbone used for feature extraction. | ||
| Defaults to `"resnet18"`. | ||
| pretrain_embed_dim (int, optional): Dimensionality of features extracted by the pre-trained backbone before | ||
| adaptation. | ||
| Defaults to `1024`. | ||
| target_embed_dim (int, optional): Dimensionality of the target adapted features after projection. | ||
| Defaults to `1024`. | ||
| patchsize (int, optional): Size of the local patch used in feature aggregation (e.g., for neighborhood pooling). | ||
| Defaults to `3`. | ||
| patchstride (int, optional): Stride used when extracting patches for local feature aggregation. | ||
| Defaults to `1`. | ||
| pre_trained (bool, optional): Whether to use ImageNet pre-trained weights for the backbone network. | ||
| Defaults to `True`. | ||
| layers (list[str], optional): List of backbone layers to extract features from. | ||
| Defaults to `["layer1", "layer2", "layer3"]`. | ||
| pre_projection (int, optional): Number of projection layers used in the feature adaptor (e.g., MLP before | ||
| discriminator). | ||
| Defaults to `1`. | ||
| discriminator_layers (int, optional): Number of layers in the discriminator network. | ||
| Defaults to `2`. | ||
| discriminator_hidden (int, optional): Number of hidden units in each discriminator layer. | ||
| Defaults to `1024`. | ||
| discriminator_margin (float, optional): Margin used for contrastive or binary classification loss in | ||
| discriminator training. | ||
| Defaults to `0.5`. | ||
| pre_processor (PreProcessor | bool, optional): reprocessing module or flag to enable default preprocessing. | ||
| Set to `True` to apply default normalization and resizing. | ||
| Defaults to `True`. | ||
| post_processor (PostProcessor | bool, optional): Postprocessing module or flag to enable default output | ||
| smoothing or thresholding. | ||
| Defaults to `True`. | ||
| evaluator (Evaluator | bool, optional): Evaluation module for calculating metrics such as AUROC and PRO. | ||
| Defaults to `True`. | ||
| visualizer (Visualizer | bool, optional): Visualization module to generate heatmaps, segmentation overlays, and | ||
| anomaly scores. | ||
| Defaults to `True`. | ||
| mining (int, optional): Number of iterations or difficulty level for Online Hard Example Mining (OHEM) during | ||
| training. | ||
| Defaults to `1`. | ||
| noise (float, optional): Standard deviation of Gaussian noise used in feature-level anomaly synthesis. | ||
| Defaults to `0.015`. | ||
| radius (float, optional): Radius parameter used for truncated projection in the anomaly synthesis strategy. | ||
| Determines the range for valid synthetic anomalies in the hypersphere or manifold. | ||
| Defaults to `0.75`. | ||
| random_selection_prob (float, optional): Probability used in random selection logic, such as anomaly mask | ||
| generation or augmentation choice. | ||
| Defaults to `0.5`. | ||
| learning_rate (float, optional): Learning rate for training the feature adaptor and discriminator networks. | ||
| Defaults to `0.0001`. | ||
| step (int, optional): Number of gradient ascent steps for anomaly synthesis. | ||
| Defaults to `20`. | ||
| svd (int, optional): Flag to enable SVD-based feature projection. | ||
| Defaults to `0`. | ||
| """ | ||
|
|
||
| def __init__( | ||
| self, | ||
| input_shape: tuple[int, int] = (256, 256), | ||
| anomaly_source_path: str | None = None, | ||
| backbone: str = "resnet18", | ||
| pretrain_embed_dim: int = 1024, | ||
| target_embed_dim: int = 1024, | ||
| patchsize: int = 3, | ||
| patchstride: int = 1, | ||
| pre_trained: bool = True, | ||
| layers: list[str] | None = None, | ||
| pre_projection: int = 1, | ||
| discriminator_layers: int = 2, | ||
| discriminator_hidden: int = 1024, | ||
| discriminator_margin: float = 0.5, | ||
| pre_processor: PreProcessor | bool = True, | ||
| post_processor: PostProcessor | bool = True, | ||
| evaluator: Evaluator | bool = True, | ||
| visualizer: Visualizer | bool = True, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For uniformity, please move the following params to the end |
||
| mining: int = 1, | ||
| noise: float = 0.015, | ||
| radius: float = 0.75, | ||
| random_selection_prob: float = 0.5, | ||
| learning_rate: float = 0.0001, | ||
| step: int = 20, | ||
| svd: int = 0, | ||
|
||
| ) -> None: | ||
| super().__init__( | ||
| pre_processor=pre_processor, | ||
| post_processor=post_processor, | ||
| evaluator=evaluator, | ||
| visualizer=visualizer, | ||
| ) | ||
|
|
||
| if layers is None: | ||
| layers = ["layer1", "layer2", "layer3"] | ||
|
|
||
| self.model = GlassModel( | ||
| input_shape=input_shape, | ||
| anomaly_source_path=anomaly_source_path, | ||
| pretrain_embed_dim=pretrain_embed_dim, | ||
| target_embed_dim=target_embed_dim, | ||
| backbone=backbone, | ||
| pre_trained=pre_trained, | ||
| patchsize=patchsize, | ||
| patchstride=patchstride, | ||
| layers=layers, | ||
| pre_projection=pre_projection, | ||
| discriminator_layers=discriminator_layers, | ||
| discriminator_hidden=discriminator_hidden, | ||
| discriminator_margin=discriminator_margin, | ||
| step=step, | ||
| svd=svd, | ||
| mining=mining, | ||
| noise=noise, | ||
| radius=radius, | ||
| random_selection_prob=random_selection_prob, | ||
| ) | ||
|
|
||
| self.learning_rate = learning_rate | ||
|
|
||
| if pre_projection > 0: | ||
| self.projection_opt = optim.AdamW( | ||
code-dev05 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| self.model.projection.parameters(), | ||
| self.learning_rate, | ||
| weight_decay=1e-5, | ||
| ) | ||
| else: | ||
| self.projection_opt = None | ||
|
|
||
| if not pre_trained: | ||
| self.backbone_opt = optim.AdamW( | ||
| self.model.forward_modules["feature_aggregator"].backbone.parameters(), | ||
| self.learning_rate, | ||
| ) | ||
| else: | ||
| self.backbone_opt = None | ||
code-dev05 marked this conversation as resolved.
Show resolved
Hide resolved
ashwinvaidya17 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| @classmethod | ||
| def configure_pre_processor( | ||
| cls, | ||
| image_size: tuple[int, int] | None = None, | ||
| center_crop_size: tuple[int, int] | None = None, | ||
| ) -> PreProcessor: | ||
| """Configure the default pre-processor for GLASS. | ||
|
|
||
| If valid center_crop_size is provided, the pre-processor will | ||
| also perform center cropping, according to the paper. | ||
|
|
||
| Args: | ||
| image_size (tuple[int, int] | None, optional): Target size for | ||
| resizing. Defaults to ``(256, 256)``. | ||
| center_crop_size (tuple[int, int] | None, optional): Size for center | ||
| cropping. Defaults to ``None``. | ||
|
|
||
| Returns: | ||
| PreProcessor: Configured pre-processor instance. | ||
|
|
||
| Raises: | ||
| ValueError: If at least one dimension of ``center_crop_size`` is larger | ||
| than correspondent ``image_size`` dimension. | ||
|
|
||
| Example: | ||
| >>> pre_processor = Glass.configure_pre_processor( | ||
| ... image_size=(256, 256) | ||
| ... ) | ||
| >>> transformed_image = pre_processor(image) | ||
| """ | ||
| image_size = image_size or (256, 256) | ||
|
||
|
|
||
| if center_crop_size is not None: | ||
| if center_crop_size[0] > image_size[0] or center_crop_size[1] > image_size[1]: | ||
| msg = f"Center crop size {center_crop_size} cannot be larger than image size {image_size}." | ||
| raise ValueError(msg) | ||
| transform = Compose([ | ||
| Resize(image_size, antialias=True), | ||
| CenterCrop(center_crop_size), | ||
| Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | ||
| ]) | ||
| else: | ||
| transform = Compose([ | ||
| Resize(image_size, antialias=True), | ||
| Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | ||
| ]) | ||
|
|
||
| return PreProcessor(transform=transform) | ||
|
|
||
| def configure_optimizers(self) -> optim.Optimizer: | ||
| """Configure optimizer for the discriminator. | ||
|
|
||
| Returns: | ||
| Optimizer: AdamW Optimizer for the discriminator. | ||
| """ | ||
| return optim.AdamW(self.model.discriminator.parameters(), lr=self.learning_rate * 2) | ||
|
|
||
| def training_step(self, batch: Batch, batch_idx: int) -> STEP_OUTPUT: | ||
| """Training step for GLASS model. | ||
|
|
||
| Args: | ||
| batch (Batch): Input batch containing images and metadata | ||
| batch_idx (int): Index of the current batch | ||
|
|
||
| Returns: | ||
| STEP_OUTPUT: Dictionary containing loss values and metrics | ||
| """ | ||
| del batch_idx | ||
| discriminator_opt = self.optimizers() | ||
|
|
||
| self.model.forward_modules.eval() | ||
| if self.model.pre_projection > 0: | ||
| self.model.projection.train() | ||
| self.model.discriminator.train() | ||
|
|
||
| discriminator_opt.zero_grad() | ||
| if self.projection_opt is not None: | ||
| self.projection_opt.zero_grad() | ||
| if self.backbone_opt is not None: | ||
| self.backbone_opt.zero_grad() | ||
|
|
||
| true_loss, gaus_loss, bce_loss, focal_loss, loss = self.model(batch.image) | ||
| loss.backward() | ||
|
|
||
| if self.projection_opt is not None: | ||
| self.projection_opt.step() | ||
| if self.backbone_opt is not None: | ||
| self.backbone_opt.step() | ||
| discriminator_opt.step() | ||
|
|
||
| self.log("true_loss", true_loss, prog_bar=True) | ||
| self.log("gaus_loss", gaus_loss, prog_bar=True) | ||
| self.log("bce_loss", bce_loss, prog_bar=True) | ||
| self.log("focal_loss", focal_loss, prog_bar=True) | ||
| self.log("loss", loss, prog_bar=True) | ||
|
|
||
| def validation_step(self, batch: Batch, batch_idx: int) -> STEP_OUTPUT: | ||
| """Performs a single validation step during model evaluation. | ||
|
|
||
| Args: | ||
| batch (Batch): A batch of input data, typically containing images and ground truth labels. | ||
| batch_idx (int): Index of the batch (unused in this function). | ||
|
|
||
| Returns: | ||
| STEP_OUTPUT: Output of the validation step, usually containing predictions and any associated metrics. | ||
| """ | ||
| del batch_idx | ||
| self.model.forward_modules.eval() | ||
|
|
||
| if self.model.pre_projection > 0: | ||
| self.model.projection.eval() | ||
| self.model.discriminator.eval() | ||
|
|
||
| predictions = self.model(batch.image) | ||
| return batch.update(**predictions._asdict()) | ||
|
|
||
| def on_train_start(self) -> None: | ||
| """Initialize model by computing mean feature representation across training dataset. | ||
|
|
||
| This method is called at the start of training and computes a mean feature vector | ||
| that serves as a reference point for the normal class distribution. | ||
| """ | ||
| dataloader = self.trainer.train_dataloader | ||
| self.model.calculate_center(dataloader, self.device) | ||
|
|
||
| @property | ||
| def learning_type(self) -> LearningType: | ||
| """Return the learning type of the model. | ||
|
|
||
| Returns: | ||
| LearningType: Learning type (ONE_CLASS for GLASS) | ||
| """ | ||
| return LearningType.ONE_CLASS | ||
|
|
||
| @property | ||
| def trainer_arguments(self) -> dict[str, Any]: | ||
| """Return GLASS trainer arguments. | ||
|
|
||
| Returns: | ||
| dict[str, Any]: Dictionary containing trainer configuration | ||
| """ | ||
| return {"gradient_clip_val": 0, "num_sanity_val_steps": 0} | ||

Uh oh!
There was an error while loading. Please reload this page.