First version of BP.

GiuseppeMarra · GiuseppeMarra · commit 3c877db8125f · 2025-11-26T17:51:52.000+01:00
#Many todos
# To add, evidence / interventions
diff --git a/examples/utilization/1_pgm/2_concept_bottleneck_model_bp/2_concept_bottleneck_model_bp.py b/examples/utilization/1_pgm/2_concept_bottleneck_model_bp/2_concept_bottleneck_model_bp.py
@@ -27,7 +27,7 @@ def main():
                                                                      torch.nn.Sigmoid()))
     b_cpd = ParametricCPD("b",
                                 parametrization=torch.nn.Sequential(torch.nn.Linear(emb_size, b.size),
-                                                                     torch.nn.Sigmoid()))
+                                                                     torch.nn.Softmax(dim=-1)))
     c_cpd = ParametricCPD("c",
                             parametrization=torch.nn.Sequential(torch.nn.Linear(a.size + b.size, c.size),
                                                                 torch.nn.Sigmoid()))
@@ -48,58 +48,5 @@ def main():
     print(results)
     exit()
 
-    print("Genotype Predictions (first 5 samples):")
-    print(results[:, 0][:5])
-    print("Smoking Predictions (first 5 samples):")
-    print(results[:, 1][:5])
-    print("Tar Predictions (first 5 samples):")
-    print(results[:, 2][:5])
-    print("Cancer Predictions (first 5 samples):")
-    print(results[:, 3][:5])
-
-    # Original predictions (observational)
-    original_results = inference_engine.query(
-        query_concepts=["genotype", "smoking", "tar", "cancer"],
-        evidence=initial_input
-    )
-
-    # Intervention: Force smoking to 0 (prevent smoking)
-    smoking_strategy_0 = DoIntervention(
-        model=concept_model.parametric_cpds,
-        constants=0.0
-    )
-    with intervention(
-            policies=UniformPolicy(out_features=1),
-            strategies=smoking_strategy_0,
-            target_concepts=["smoking"]
-    ):
-        intervened_results = inference_engine.query(
-            query_concepts=["genotype", "smoking", "tar", "cancer"],
-            evidence=initial_input
-        )
-        cancer_do_smoking_0 = intervened_results[:, 3]
-
-    # Intervention: Force smoking to 1 (promote smoking)
-    smoking_strategy_1 = DoIntervention(
-        model=concept_model.parametric_cpds,
-        constants=1.0
-    )
-    with intervention(
-            policies=UniformPolicy(out_features=1),
-            strategies=smoking_strategy_1,
-            target_concepts=["smoking"]
-    ):
-        intervened_results = inference_engine.query(
-            query_concepts=["genotype", "smoking", "tar", "cancer"],
-            evidence=initial_input
-        )
-        cancer_do_smoking_1 = intervened_results[:, 3]
-
-    ace_cancer_do_smoking = cace_score(cancer_do_smoking_0, cancer_do_smoking_1)
-    print(f"ACE of smoking on cancer: {ace_cancer_do_smoking:.3f}")
-
-    return
-
-
 if __name__ == "__main__":
     main()
diff --git a/examples/utilization/1_pgm/2_concept_bottleneck_model_bp/bp_with_conditional.py b/examples/utilization/1_pgm/2_concept_bottleneck_model_bp/bp_with_conditional.py
@@ -484,9 +484,10 @@ def compute_exact_marginals_bruteforce(
 
 class BPInference(BaseInference):
 
-    def __init__(self, model):
+    def __init__(self, model, iters = 5):
         super().__init__()
         self.model : ProbabilisticModel = model
+        self.iters = iters
 
 
         variables = {}
@@ -565,7 +566,7 @@ def query(self, query, evidence):
                 factor_eval_list.append(factor_eval)
                 continue
             else:
-                for i, p in enumerate(cpd.parents):
+                for i, p in enumerate(cpd.variable.parents):
 
                     if p.distribution is Delta:
                         emb = embeddings_dict[p.concepts[0]] # [B, emb_dim]
@@ -595,7 +596,7 @@ def query(self, query, evidence):
 
                 # turn into bidimentional tensor: [B * num_assignments, input_dim]
                 input = input.view(batch_size * num_assignments, -1)
-                evaluation = cpd.parameterization(input)
+                evaluation = cpd.parametrization(input)
 
                 # reshape back to [B, num_assignments, output_dim]
                 evaluation = evaluation.view(batch_size, num_assignments, -1)
@@ -604,40 +605,45 @@ def query(self, query, evidence):
                 # TODO: We need to turn them into factor evaluations. In each factor, the target variable of the CPD is the first variable in the scope so we can do a simple reshape
                 # TODO: check that this is the case
 
-                if cpd.distribution is RelaxedOneHotCategorical:
+                if cpd.variable.distribution is RelaxedOneHotCategorical:
                     #TODO: Check that it is concatenating the third dimension into the num_assignments dimension
-                    factor_eval = evaluation.view(batch_size, -1)
 
-                elif cpd.distribution is RelaxedBernoulli:
+                    # this is the tensorial equivalent to torch.cat([evaluation[:, :, i] for i in range(evaluation.shape[2])], dim=1)
+                    factor_eval = evaluation.permute(0, 2, 1).reshape(batch_size, -1)
+
+                elif cpd.variable.distribution is RelaxedBernoulli:
                     # Bernoulli output: need to create a factor eval of size 2
                     prob_1 = evaluation.view(batch_size, -1)
                     prob_0 = 1.0 - prob_1
                     factor_eval = torch.cat([prob_0, prob_1], dim=1)
-                elif cpd.distribution is Delta:
+                elif cpd.variable.distribution is Delta:
                     factor_eval = torch.ones([batch_size,1], device=evaluation.device)
                 else:
                     raise NotImplementedError("Unknown CPD distribution in CPD2FactorWrapper.")
 
                 factor_eval_list.append(factor_eval)
 
+        B = batch_size
+        S = self.metadata["total_edge_states"]
+        E = self.metadata["E"]
         messages_f2v_init = torch.rand(B, S)
 
-        edge_id = md["edge_id_per_state"]  # [S]
+        edge_id = self.metadata["edge_id_per_state"]  # [S]
         edge_id_b = edge_id.unsqueeze(0).expand(B, -1)  # [B, S]
         sum_per_edge = torch.zeros(B, E)
         sum_per_edge.scatter_add_(1, edge_id_b, messages_f2v_init)
         messages_f2v_init = messages_f2v_init / (sum_per_edge.gather(1, edge_id_b) + 1e-20)
 
         messages_f2v_uncond = messages_f2v_init.clone()
-        for it in range(num_iters):
+        for it in range(self.iters):
             messages_v2f_uncond = update_var_to_factor(
-                messages_f2v_uncond, md, evidence_logmask_vs=None
+                messages_f2v_uncond, self.metadata, evidence_logmask_vs=None
             )
             messages_f2v_uncond = update_factor_to_var(
-                messages_v2f_uncond, factor_eval_list, md
+                messages_v2f_uncond, factor_eval_list, self.metadata
             )
         bp_marginals_uncond = compute_var_marginals(
-            messages_f2v_uncond, md, evidence_logmask_vs=None
+            messages_f2v_uncond, self.metadata, evidence_logmask_vs=None
         )
 
         return bp_marginals_uncond