databrickslabs
diff --git a/‎feature-registry-app/.gitignore‎
Lines changed: 11 additions & 0 deletions b/‎feature-registry-app/.gitignore‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎feature-registry-app/README.md‎
Lines changed: 59 additions & 6 deletions b/‎feature-registry-app/README.md‎
Lines changed: 59 additions & 6 deletions
diff --git a/‎feature-registry-app/deploy.sh‎
Lines changed: 22 additions & 0 deletions b/‎feature-registry-app/deploy.sh‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎feature-registry-app/pytest.ini‎
Lines changed: 24 additions & 0 deletions b/‎feature-registry-app/pytest.ini‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎feature-registry-app/src/app.yaml‎
Lines changed: 11 additions & 0 deletions b/‎feature-registry-app/src/app.yaml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎feature-registry-app/src/clients/uc_client.py‎
Lines changed: 24 additions & 0 deletions b/‎feature-registry-app/src/clients/uc_client.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎feature-registry-app/src/entities/__init__.py‎ b/‎feature-registry-app/src/entities/__init__.py‎
diff --git a/‎feature-registry-app/src/entities/features.py‎
Lines changed: 72 additions & 0 deletions b/‎feature-registry-app/src/entities/features.py‎
Lines changed: 72 additions & 0 deletions
diff --git a/‎feature-registry-app/src/entities/functions.py‎
Lines changed: 30 additions & 0 deletions b/‎feature-registry-app/src/entities/functions.py‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎feature-registry-app/src/entities/tables.py‎
Lines changed: 20 additions & 0 deletions b/‎feature-registry-app/src/entities/tables.py‎
Lines changed: 20 additions & 0 deletions
@@ -0,0 +1,11 @@
+# Databricks
+.databricks/
+.databricks.sync-snapshots
+
+# Python
+__pycache__/
+.pytest_cache/
+.coverage
+
+# Environment & Config
+deploy_config.sh
@@ -7,13 +7,14 @@ date: 2025-08-05
 
 # 🚀 Feature Registry Application
 
-This application provides a modern interface for discovering and managing features with seamless integration to Unity Catalog.
+This is a modern web application that allows users to interact with the Databricks Feature Registry. The app provides a user-friendly interface for exploring existing features in Unity Catalog. Additionally, users can generate code for creating feature specs and training sets to train machine learning models and deploy features as Feature Serving Endpoints.
 
 ## ✨ Features
 
-- 🔍 List and search for features
+- 🔍 List and search for features in Unity Catalog
 - 🔒 On-behalf-of-user authentication
 - ⚙️ Code-gen for creating feature specs and training sets
+- 📋 Configurable catalog allow-listing for access control
 
 ## 🏗️ Architecture
 
@@ -25,11 +26,63 @@ The application is built with:
 
 ![Feature Registry Interface](./images/feature-registry-interface.png)
 
+## 🚀 Deployment
+
+### Create an App
+1. Log into your destination Databricks workspace and navigate to "Compute > Apps"
+2. Click on "Create App" and select "Create a custom app"
+3. Enter an app name and click "Create app"
+
+### Customization
+1. Create a file named `deploy_config.sh` in the root folder with the following variables:
+   ```sh
+   # Path to a destination folder in default Databricks workspace where source code will be sync'ed
+   export DEST=/Workspace/Users/Path/To/App/Code 
+   # Name of the App to deploy
+   export APP_NAME=your-app-name
+   ```
+   Or simply run `./deploy.sh` - it will create a template file if it doesn't exist
+
+2. Update `deploy_config.sh` with the config for your environment
+
+3. Ensure the Databricks CLI is installed and configured on your machine. The "DEFAULT" profile should point to the destination workspace where the app will be deployed. You can find instructions here for [AWS](https://docs.databricks.com/dev-tools/cli/index.html) / [Azure](https://learn.microsoft.com/en-us/azure/databricks/dev-tools/cli/)
+
+### Deploy the App
+1. Navigate to the app directory
+2. Run `./deploy.sh` shell command. This will sync the app code to the destination workspace location and deploy the app
+3. Navigate to the Databricks workspace and access the app via "Compute > Apps"
+
+## 🔐 Access Control
+
+### Catalog Allow-Listing
+
+By default, the Feature Registry App will show all the catalogs to which the user has read access. You can restrict which Unity Catalog catalogs users can explore for features. This is useful for:
+- Limiting feature discovery to production-ready catalogs
+- Ensuring data scientists only access approved feature sets
+- Organizing features by teams or projects
+
+#### Setting Up Allow-Listed Catalogs
+
+1. Edit the `src/uc_catalogs_allowlist.yaml` file
+2. Uncomment and add the catalog names you want to allow:
+
+   ```yaml
+   # List catalogs that should be accessible in the Feature Registry App
+   - production_features
+   - team_a_catalog
+   - ml_features_catalog
+   ```
+
+3. If the file is empty or all entries are commented out, the app will show all catalogs available to the user
+4. Deploy the app with the updated configuration
+
+**Note:** Users will still need appropriate permissions in Unity Catalog to access the data within these catalogs. The allow-list acts as an additional filter on top of existing permissions.
+
 ## 🔑 Requirements
 
 The application requires the following scopes:
-- `catalog.catalogs`
-- `catalog.schemas`
-- `catalog.tables` 
+- `catalog.catalogs:read`
+- `catalog.schemas:read`
+- `catalog.tables:read` 
 
-The app owner needs to grant other users `Can Use` permission for the app itself, along with the access to the underlying Datarbricks resources.
+The app owner needs to grant other users `Can Use` permission for the app itself, along with access to the underlying Databricks resources.
@@ -0,0 +1,22 @@
+echo_red() {
+    echo "\033[1;31m$*\033[0m"
+}
+
+# Validate the current folder
+[[ -d "./src" && -f "./src/app.yaml" ]] || { echo_red "Error: Couldn't find app.yaml. \nPlease run this script from the //sandbox/feature-registry-app directory."; exit 1; }
+
+# Users: Make sure you have a ./deploy_config.sh file that sets the necessary variables for this script.
+[ -f "./deploy_config.sh" ] || {
+cat <<EOF > deploy_config.sh
+# Path to a folder in the workspace. E.g. /Workspace/Users/Path/To/App/Code
+export DEST=""
+# Name of the App to deploy. E.g. your-app-name
+export APP_NAME=""
+EOF
+echo_red "Please update deploy_config.sh and run again."
+exit 1;
+}
+source ./deploy_config.sh
+
+databricks sync --full ./src $DEST
+databricks apps deploy $APP_NAME --source-code-path $DEST
@@ -0,0 +1,24 @@
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+
+# Add src directory to Python path
+pythonpath = src
+
+# Coverage settings
+[coverage:run]
+source = src
+omit = 
+    */__pycache__/*
+    */tests/*
+
+[coverage:report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise NotImplementedError
+    if __name__ == .__main__.:
+    pass
+    raise ImportError
@@ -0,0 +1,11 @@
+command: [
+  "streamlit", 
+  "run",
+  "feature_registry.py"
+]
+
+env:
+  - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS
+    value: "false"
+  - name: UC_CATALOGS_ALLOWLIST # Set this to the path of the yaml file that contains the allow-listed UC catalogs. The Feature Registry App will restrict the search of features only to this list of catalogs.
+    value: "uc_catalogs_allowlist.yaml"
@@ -0,0 +1,24 @@
+from databricks.sdk import WorkspaceClient
+
+
+class UcClient:
+    def __init__(self, user_access_token: str):
+        self.w = WorkspaceClient(token=user_access_token, auth_type="pat")
+
+    def get_catalogs(self):
+        return self.w.catalogs.list(include_browse=False)
+
+    def get_schemas(self, catalog_name: str):
+        return self.w.schemas.list(catalog_name=catalog_name)
+
+    def get_tables(self, catalog_name: str, schema_name: str):
+        return self.w.tables.list(catalog_name=catalog_name, schema_name=schema_name)
+
+    def get_table(self, full_name: str):
+        return self.w.tables.get(full_name=full_name)
+
+    def get_functions(self, catalog_name: str, schema_name: str):
+        return self.w.functions.list(catalog_name=catalog_name, schema_name=schema_name)
+
+    def get_function(self, full_name: str):
+        return self.w.functions.get(name=full_name)
@@ -0,0 +1,72 @@
+from typing import Any, Dict, List, Optional, Tuple
+
+from pydantic import BaseModel
+
+from .tables import Table
+
+
+class MaterializedInfo(BaseModel):
+    schema_name: str
+    table_name: str
+    primary_keys: List[str]
+    timeseries_columns: List[str]
+
+
+class Feature:
+    def __init__(
+        self, name: str, table: Table, pks: List[str], ts: Optional[List[str]] = None
+    ):
+        self.name = name
+        self.table = table
+        self.pks = pks
+        self.ts = ts or []
+
+    def get_materialized_info(self) -> MaterializedInfo:
+        return MaterializedInfo(
+            schema_name=self.table.schema(),
+            table_name=self.table.name(),
+            primary_keys=self.pks or [],
+            timeseries_columns=self.ts or [],
+        )
+
+    def description(self) -> str:
+        for column in self.table.uc_table.columns:
+            if column.name == self.name:
+                return column.comment
+        return ""
+
+    def components(self) -> Tuple[str, str, str]:
+        return self.name, self.table.full_name(), ", ".join(self.pks)
+
+    def metadata(self) -> Dict[str, Any]:
+        return {
+            "Table Name": self.table.full_name(),
+            "Primary Keys": self.pks,
+            "Timeseries Columns": self.ts,
+            "# of Features": len(self.table.uc_table.columns) - len(self.pks),
+            "Table Type": self.table.uc_table.table_type.name,
+        }
+
+    def inputs(self) -> Dict[str, str] | None:
+        return None
+
+    def outputs(self) -> Dict[str, str] | None:
+        return None
+
+    def code(self) -> str:
+        return self.table.uc_table.view_definition
+
+    def table_name(self) -> str:
+        return self.table.full_name()
+
+    def full_name(self) -> str:
+        return f"{self.table.full_name()}.{self.name}"
+
+
+class SelectableFeature:
+    def __init__(self, feature: Feature, selected: bool = False):
+        self.feature = feature
+        self.selected = selected
+
+    def components(self) -> Tuple[bool, str, str, str]:
+        return (self.selected,) + self.feature.components()
@@ -0,0 +1,30 @@
+from typing import Any, Dict, Tuple
+
+from databricks import sdk
+from pydantic import BaseModel
+
+
+class FeatureFunction(BaseModel):
+    function: sdk.service.catalog.FunctionInfo
+
+    def full_name(self) -> str:
+        return self.function.full_name
+
+    def components(self) -> Tuple[str, str, Any, Any]:
+        return self.full_name(), "feature spec", None, None
+
+    def metadata(self) -> Dict[str, Any] | None:
+        return None
+
+    def inputs(self) -> Dict[str, str] | None:
+        if self.function.input_params and self.function.input_params.parameters:
+            return {p.name: p.type_text for p in self.function.input_params.parameters}
+        return None
+
+    def outputs(self) -> Dict[str, str] | None:
+        if self.function.return_params and self.function.return_params.parameters:
+            return {p.name: p.type_text for p in self.function.return_params.parameters}
+        return None
+
+    def code(self) -> str:
+        return self.function.routine_definition
@@ -0,0 +1,20 @@
+from typing import Tuple
+
+from databricks import sdk
+
+
+class Table:
+    def __init__(self, uc_table: sdk.service.catalog.TableInfo):
+        self.uc_table = uc_table
+
+    def full_name(self) -> str:
+        return self.uc_table.full_name
+
+    def name(self) -> str:
+        return self.uc_table.name
+
+    def schema(self) -> str:
+        return self.uc_table.schema_name
+
+    def components(self) -> Tuple[str, str, str]:
+        return self.uc_table.catalog_name, self.uc_table.schema_name, self.uc_table.name