geometric-intelligence · ninamiolane · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026 · Apr 3, 2026
diff --git a/.gitignore b/.gitignore
@@ -14,7 +14,7 @@ notebooks/W-weights.pdf
 notebooks/predictions.pdf
 
 runs/
-data/
+#data/
 !notebooks/data/
 sweep_results/
 

diff --git a/README.md b/README.md
@@ -121,13 +121,13 @@ The repository includes preconfigured experiments for eight groups:
 | Group | Config | Order | k | Architecture |
 |:------|:-------|:-----:|:-:|:-------------|
 | Cyclic $C_{10}$ | `config_c10_k3.yaml` | 10 | 3 | SequentialMLP |
-| Cyclic $C_{11}$ | `config_c11.yaml` | 11 | 2 | TwoLayerNet |
+| Cyclic $C_{11}$ | `config_c11.yaml` | 11 | 2 | TwoLayerMLP |
 | Product $C_4 \times C_4$ | `config_c4x4_k3.yaml` | 16 | 3 | SequentialMLP |
-| Product $C_5 \times C_5$ | `config_c5xc5.yaml` | 25 | 2 | TwoLayerNet |
-| Dihedral $D_3$ | `config_d3.yaml` | 6 | 2 | TwoLayerNet |
-| Dihedral $D_5$ | `config_d5.yaml` | 10 | 2 | TwoLayerNet |
-| Octahedral $O_h$ | `config_oh.yaml` | 24 | 2 | TwoLayerNet |
-| Icosahedral $A_5$ | `config_a5.yaml` | 60 | 2 | TwoLayerNet |
+| Product $C_5 \times C_5$ | `config_c5xc5.yaml` | 25 | 2 | TwoLayerMLP |
+| Dihedral $D_3$ | `config_d3.yaml` | 6 | 2 | TwoLayerMLP |
+| Dihedral $D_5$ | `config_d5.yaml` | 10 | 2 | TwoLayerMLP |
+| Octahedral $O_h$ | `config_oh.yaml` | 24 | 2 | TwoLayerMLP |
+| Icosahedral $A_5$ | `config_a5.yaml` | 60 | 2 | TwoLayerMLP |
 
 ### Reproduce Paper's Figure
 
@@ -170,7 +170,7 @@ Key parameters in the YAML config files:
 | `data.group_name` | `cn`, `cnxcn`, `dihedral`, `octahedral`, `A5` | Group to learn |
 | `data.k` | integer | Number of elements to compose |
 | `data.template_type` | `custom_fourier`, `onehot`, `mnist`, `gaussian` | Template generation method |
-| `model.model_type` | `QuadraticRNN`, `SequentialMLP`, `TwoLayerNet` | Architecture |
+| `model.model_type` | `QuadraticRNN`, `SequentialMLP`, `TwoLayerMLP` | Architecture |
 | `model.hidden_dim` | integer | Hidden layer size |
 | `model.init_scale` | float | Weight initialization scale |
 | `training.optimizer` | `auto`, `adam`, `per_neuron`, `hybrid` | Optimizer (`auto` recommended) |
@@ -190,7 +190,7 @@ data:
   powers: [0.0, 3000.0, 2000.0, 1000.0]
 
 model:
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   hidden_dim: 300
   init_scale: 0.0001
 
@@ -209,7 +209,7 @@ training:
 group-agf/
 ├── src/                          # Source code
 │   ├── main.py                   # Training entry point (CLI)
-│   ├── model.py                  # TwoLayerNet, QuadraticRNN, SequentialMLP
+│   ├── model.py                  # TwoLayerMLP, QuadraticRNN, SequentialMLP
 │   ├── optimizer.py              # PerNeuronScaledSGD, HybridRNNOptimizer
 │   ├── dataset.py                # Dataset generation and loading
 │   ├── template.py               # Template construction functions
@@ -239,15 +239,15 @@ group-agf/
 
 | Model | Description | Input |
 |:------|:------------|:------|
-| **TwoLayerNet** | Two-layer feedforward network with configurable nonlinearity (square, relu, tanh, gelu) | Flattened binary pair `(N, 2 * group_size)` |
+| **TwoLayerMLP** | Two-layer feedforward network with configurable nonlinearity (square, relu, tanh, gelu) | Flattened binary pair `(N, 2 * group_size)` |
 | **QuadraticRNN** | Recurrent network: `h_t = (W_mix h_{t-1} + W_drive x_t)^2` | Sequence `(N, k, p)` |
 | **SequentialMLP** | Feedforward MLP with k-th power activation, permutation-invariant for commutative groups | Sequence `(N, k, p)` |
 
 ### `optimizer.py` -- Custom Optimizers
 
 | Optimizer | Description | Recommended for |
 |:----------|:------------|:----------------|
-| **PerNeuronScaledSGD** | SGD with per-neuron learning rate scaling exploiting model homogeneity | SequentialMLP, TwoLayerNet |
+| **PerNeuronScaledSGD** | SGD with per-neuron learning rate scaling exploiting model homogeneity | SequentialMLP, TwoLayerMLP |
 | **HybridRNNOptimizer** | Scaled SGD for MLP weights + Adam for recurrent weights | QuadraticRNN |
 | Adam (PyTorch built-in) | Standard Adam | QuadraticRNN |
 

diff --git a/notebooks/cn.ipynb b/notebooks/cn.ipynb
@@ -173,7 +173,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model: TwoLayerNet(p=11, hidden=200, init_scale=1e-05)\n",
+      "Model: TwoLayerMLP(p=11, hidden=200, init_scale=1e-05)\n",
       "Optimizer: PerNeuronScaledSGD(lr=0.01, degree=3)\n",
       "Training for 5000 epochs\n"
      ]

diff --git a/notebooks/cnxcn.ipynb b/notebooks/cnxcn.ipynb
@@ -10,7 +10,7 @@
     "**Group:** Product of cyclic groups $C_n \\times C_n$ of order $n^2$.  \n",
     "**Task:** Given encodings of two group elements $g_1, g_2 \\in C_n \\times C_n$, predict the encoding of their product.  \n",
     "**Sequence length:** $k = 2$ (binary composition).  \n",
-    "**Architecture:** `TwoLayerNet` with square nonlinearity.  \n",
+    "**Architecture:** `TwoLayerMLP` with square nonlinearity.  \n",
     "**Key result:** The network learns one irreducible representation at a time."
    ]
   },
@@ -176,7 +176,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Model: TwoLayerNet(group_size=25, hidden=128)\n",
+      "Model: TwoLayerMLP(group_size=25, hidden=128)\n",
       "Optimizer: Adam(lr=0.001)\n",
       "Training for 1000 epochs\n"
      ]

diff --git a/notebooks/dn.ipynb b/notebooks/dn.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,7 +28,6 @@ scikit-learn = "^1.7.2"
 tqdm = "^4.67.1"
 seaborn = "^0.13.2"
 jupyter-black = "^0.4.0"
-escnn = "^1.0.11"
 
 
 [build-system]
@@ -60,7 +59,7 @@ ignore = [
     "E501",  # line too long (handled by formatter)
     "B008",  # do not perform function calls in argument defaults
     "B905",  # zip without strict
-    "F403",  # star imports (used intentionally for escnn.group)
+    "F403",  # star imports
     "F405",  # may be undefined from star imports
     "F841",  # local variable assigned but never used (common in notebooks/experiments)
     "B007",  # loop control variable not used (common pattern)

diff --git a/runs_data/A5/config.yaml b/runs_data/A5/config.yaml
@@ -20,7 +20,7 @@ device: cuda:0
 model:
   hidden_dim: 400
   init_scale: 1.0e-05
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   nonlinearity: square
   output_scale: 1.0
   return_all_outputs: false

diff --git a/runs_data/C11/config.yaml b/runs_data/C11/config.yaml
@@ -22,18 +22,17 @@ device: cuda:0
 model:
   hidden_dim: 200
   init_scale: 1.0e-05
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   nonlinearity: square
   output_scale: 1.0
   return_all_outputs: false
-  transform_type: quadratic
 training:
   betas:
   - 0.9
   - 0.999
   degree: null
   dense_save_until: 10
-  epochs: 100000
+  epochs: 5000
   grad_clip: 0.1
   learning_rate: 0.01
   mode: offline

diff --git a/runs_data/C11/param_save_indices.npy b/runs_data/C11/param_save_indices.npy
diff --git a/runs_data/C11/power_data.npz b/runs_data/C11/power_data.npz
diff --git a/runs_data/C11/train_loss_history.npy b/runs_data/C11/train_loss_history.npy
diff --git a/runs_data/C11/w_dominant_irrep_fraction.npz b/runs_data/C11/w_dominant_irrep_fraction.npz
diff --git a/runs_data/C5xC5/config.yaml b/runs_data/C5xC5/config.yaml
@@ -23,7 +23,7 @@ device: cuda:0
 model:
   hidden_dim: 300
   init_scale: 1.0e-05
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   nonlinearity: square
   output_scale: 1.0
   return_all_outputs: false

diff --git a/runs_data/D5/config.yaml b/runs_data/D5/config.yaml
@@ -25,7 +25,7 @@ device: cuda:1
 model:
   hidden_dim: 200
   init_scale: 1.0e-06
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   nonlinearity: square
   output_scale: 1.0
   return_all_outputs: false

diff --git a/runs_data/Oh/config.yaml b/runs_data/Oh/config.yaml
@@ -20,18 +20,17 @@ device: cuda:0
 model:
   hidden_dim: 300
   init_scale: 1.0e-05
-  model_type: TwoLayerNet
+  model_type: TwoLayerMLP
   nonlinearity: square
   output_scale: 1.0
   return_all_outputs: false
-  transform_type: quadratic
 training:
   betas:
   - 0.9
   - 0.999
   degree: null
   dense_save_until: 10
-  epochs: 1000000
+  epochs: 1000
   grad_clip: 0.1
   learning_rate: 0.001
   mode: offline

diff --git a/runs_data/Oh/param_save_indices.npy b/runs_data/Oh/param_save_indices.npy
diff --git a/runs_data/Oh/power_data.npz b/runs_data/Oh/power_data.npz
diff --git a/runs_data/Oh/train_loss_history.npy b/runs_data/Oh/train_loss_history.npy
diff --git a/runs_data/Oh/w_dominant_irrep_fraction.npz b/runs_data/Oh/w_dominant_irrep_fraction.npz
diff --git a/scripts/extract_escnn_data.py b/scripts/extract_escnn_data.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+"""One-time script: extract irrep and regular-rep matrices from escnn.
+
+Saves .npy files into src/groups/data/ that are loaded at import time
+by oh.py and a5.py.
+
+Usage:
+    conda activate group-agf
+    python scripts/extract_escnn_data.py
+"""
+
+from pathlib import Path
+
+import numpy as np
+from escnn.group import Icosahedral, Octahedral
+
+DATA_DIR = Path(__file__).resolve().parent.parent / "src" / "groups" / "data"
+
+
+def extract_group(escnn_group, prefix: str):
+    """Extract and save all group data as .npy files."""
+    DATA_DIR.mkdir(parents=True, exist_ok=True)
+
+    elements = list(escnn_group.elements)
+    irreps = escnn_group.irreps()
+
+    print(f"  Order: {escnn_group.order()}")
+    print(f"  Irreps: {len(irreps)}  dims={[ir.size for ir in irreps]}")
+    print(f"  Irrep names: {[str(ir) for ir in irreps]}")
+
+    names = [str(ir) for ir in irreps]
+    np.save(DATA_DIR / f"{prefix}_irrep_names.npy", np.array(names))
+
+    for i, irrep in enumerate(irreps):
+        mats = np.array([irrep(g) for g in elements])
+        path = DATA_DIR / f"{prefix}_irrep_{i}.npy"
+        np.save(path, mats)
+        print(f"  Saved {path.name}  shape={mats.shape}")
+
+    regular_rep = escnn_group.representations["regular"]
+    reg_mats = np.array([regular_rep(g) for g in elements])
+    path = DATA_DIR / f"{prefix}_regular_rep.npy"
+    np.save(path, reg_mats)
+    print(f"  Saved {path.name}  shape={reg_mats.shape}")
+
+
+def main():
+    print("Extracting Octahedral ...")
+    extract_group(Octahedral(), "oh")
+
+    print("\nExtracting Icosahedral (A5) ...")
+    extract_group(Icosahedral(), "a5")
+
+    print(f"\nAll files saved to {DATA_DIR}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/configs/config_a5.yaml b/src/configs/config_a5.yaml
@@ -21,8 +21,8 @@ model:
   model_type: TwoLayerMLP     # Model architecture: 'QuadraticRNN' | 'TwoLayerMLP'
   hidden_dim: 400             # Hidden layer width
   init_scale: 0.00001           # Weight initialization scale
-  nonlinearity: square        # Activation function (TwoLayerNet only): 'square' | ...
-  output_scale: 1.0           # Output layer scaling factor (TwoLayerNet only)
+  nonlinearity: square        # Activation function (TwoLayerMLP only): 'square' | ...
+  output_scale: 1.0           # Output layer scaling factor (TwoLayerMLP only)
   return_all_outputs: false   # true = seq-to-seq, false = seq-to-one (final output only)
 
 training:

diff --git a/src/configs/config_c11.yaml b/src/configs/config_c11.yaml
@@ -17,11 +17,11 @@ data:
   powers: [0, 12.5, 10, 7.5, 5, 2.5] #[0.0, 200.0, 100.0, 50.0, 20.0, 5.]
 
 model:
-  model_type: TwoLayerNet     # Model architecture: 'QuadraticRNN' | 'SequentialMLP' | 'TwoLayerNet'
+  model_type: TwoLayerMLP     # Model architecture: 'QuadraticRNN' | 'SequentialMLP' | 'TwoLayerMLP'
   hidden_dim: 200             # Hidden layer width
   init_scale: 0.00001           # Weight initialization scale
-  nonlinearity: square        # Activation function (TwoLayerNet only): 'square' | ...
-  output_scale: 1.0           # Output layer scaling factor (TwoLayerNet only)
+  nonlinearity: square        # Activation function (TwoLayerMLP only): 'square' | ...
+  output_scale: 1.0           # Output layer scaling factor (TwoLayerMLP only)
   return_all_outputs: false   # true = seq-to-seq, false = seq-to-one (final output only)
 
 training:

diff --git a/src/configs/config_c5xc5.yaml b/src/configs/config_c5xc5.yaml
@@ -20,8 +20,8 @@ model:
   model_type: TwoLayerMLP     # Model architecture: 'QuadraticRNN' | 'TwoLayerMLP'
   hidden_dim: 300             # Hidden layer width (hidden_factor=20 * group_size=25)
   init_scale: 0.00001          # Weight initialization scale
-  nonlinearity: square        # Activation function (TwoLayerNet only): 'square' | ...
-  output_scale: 1.0           # Output layer scaling factor (TwoLayerNet only)
+  nonlinearity: square        # Activation function (TwoLayerMLP only): 'square' | ...
+  output_scale: 1.0           # Output layer scaling factor (TwoLayerMLP only)
   return_all_outputs: false   # true = seq-to-seq, false = seq-to-one (final output only)
 
 training:

diff --git a/src/configs/config_d5.yaml b/src/configs/config_d5.yaml
@@ -22,8 +22,8 @@ model:
   model_type: TwoLayerMLP     # Model architecture: 'QuadraticRNN' | 'TwoLayerMLP'
   hidden_dim: 200             # Hidden layer width (hidden_factor=20 * group_size=10)
   init_scale: 0.000001      # Weight initialization scale
-  nonlinearity: square        # Activation function (TwoLayerNet only): 'square' | ...
-  output_scale: 1.0           # Output layer scaling factor (TwoLayerNet only)
+  nonlinearity: square        # Activation function (TwoLayerMLP only): 'square' | ...
+  output_scale: 1.0           # Output layer scaling factor (TwoLayerMLP only)
   return_all_outputs: false   # true = seq-to-seq, false = seq-to-one (final output only)
 
 training:

diff --git a/src/configs/config_oh.yaml b/src/configs/config_oh.yaml
@@ -21,8 +21,8 @@ model:
   model_type: TwoLayerMLP     # Model architecture: 'QuadraticRNN' | 'TwoLayerMLP'
   hidden_dim: 300             # Hidden layer width
   init_scale: 0.00001           # Weight initialization scale
-  nonlinearity: square        # Activation function (TwoLayerNet only): 'square' | ...
-  output_scale: 1.0           # Output layer scaling factor (TwoLayerNet only)
+  nonlinearity: square        # Activation function (TwoLayerMLP only): 'square' | ...
+  output_scale: 1.0           # Output layer scaling factor (TwoLayerMLP only)
   return_all_outputs: false   # true = seq-to-seq, false = seq-to-one (final output only)
 
 training:

diff --git a/src/dataset.py b/src/dataset.py
@@ -177,18 +177,15 @@ def _build_group(
         num_samples=65536,
         return_all_outputs=False,
     ):
-        """Build dataset arrays for any escnn group with a regular representation."""
-        group_size = group.order()
+        """Build dataset arrays for any group with a regular representation."""
+        group_size = group.order
 
         assert template.shape == (group_size,), (
             f"template must be ({group_size},), got {template.shape}"
         )
 
-        regular_rep = group.representations["regular"]
-        elements = list(group.elements)
-        n_elements = len(elements)
-
-        rep_matrices = np.array([regular_rep(g) for g in elements])
+        n_elements = group_size
+        rep_matrices = group.regular_rep()
 
         if mode == "exhaustive":
             total = n_elements**k

diff --git a/src/fourier.py b/src/fourier.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,7 +14,7 @@ notebooks/W-weights.pdf @@
     notebooks/predictions.pdf
     runs/
-    data/
+    #data/
     !notebooks/data/
     sweep_results/
@@ Expand Down @@