subhadarship
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 3 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎build/lib/kmeans_pytorch/__init__.py‎
Lines changed: 135 additions & 0 deletions b/‎build/lib/kmeans_pytorch/__init__.py‎
Lines changed: 135 additions & 0 deletions
diff --git a/‎build/lib/kmeans_pytorch/main.py‎
Lines changed: 12 additions & 0 deletions b/‎build/lib/kmeans_pytorch/main.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎dist/kmeans_pytorch-0.2-py3-none-any.whl‎
4.09 KB b/‎dist/kmeans_pytorch-0.2-py3-none-any.whl‎
4.09 KB
diff --git a/‎dist/kmeans_pytorch-0.2.tar.gz‎
3.98 KB b/‎dist/kmeans_pytorch-0.2.tar.gz‎
3.98 KB
diff --git a/‎kmeans_pytorch.egg-info/PKG-INFO‎
Lines changed: 60 additions & 0 deletions b/‎kmeans_pytorch.egg-info/PKG-INFO‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎kmeans_pytorch.egg-info/SOURCES.txt‎
Lines changed: 12 additions & 0 deletions b/‎kmeans_pytorch.egg-info/SOURCES.txt‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎kmeans_pytorch.egg-info/dependency_links.txt‎
Lines changed: 1 addition & 0 deletions b/‎kmeans_pytorch.egg-info/dependency_links.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎kmeans_pytorch.egg-info/entry_points.txt‎
Lines changed: 3 additions & 0 deletions b/‎kmeans_pytorch.egg-info/entry_points.txt‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) Subhadarshi Panda
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,3 @@
+# Things to include in the built package (besides the packages defined in setup.py)
+include README.md
+include LICENSE
@@ -0,0 +1,135 @@
+import numpy as np
+import torch
+from tqdm import tqdm
+
+
+def initialize(X, num_clusters):
+    num_samples = len(X)
+    indices = np.random.choice(num_samples, num_clusters, replace=False)
+    initial_state = X[indices]
+    return initial_state
+
+
+def kmeans(
+        X,
+        num_clusters,
+        distance='euclidean',
+        tol=1e-4,
+        device=torch.device('cpu')
+):
+    print(f'running k-means on {device}..')
+
+    if distance == 'euclidean':
+        pairwise_distance_function = pairwise_distance
+    elif distance == 'cosine':
+        pairwise_distance_function = pairwise_cosine
+    else:
+        raise NotImplementedError
+
+    # convert to float
+    X = X.float()
+
+    # transfer to device
+    X = X.to(device)
+
+    # initialize
+    initial_state = initialize(X, num_clusters)
+
+    iteration = 0
+    tqdm_meter = tqdm(desc='[running kmeans]')
+    while True:
+        dis = pairwise_distance_function(X, initial_state)
+
+        choice_cluster = torch.argmin(dis, dim=1)
+
+        initial_state_pre = initial_state.clone()
+
+        for index in range(num_clusters):
+            selected = torch.nonzero(choice_cluster == index).squeeze().to(device)
+
+            selected = torch.index_select(X, 0, selected)
+            initial_state[index] = selected.mean(dim=0)
+
+        center_shift = torch.sum(
+            torch.sqrt(
+                torch.sum((initial_state - initial_state_pre) ** 2, dim=1)
+            ))
+
+        # increment iteration
+        iteration = iteration + 1
+
+        # update tqdm meter
+        tqdm_meter.set_postfix(
+            iteration=f'{iteration}',
+            center_shift=f'{center_shift ** 2:0.6f}',
+            tol=f'{tol:0.6f}'
+        )
+        tqdm_meter.update()
+        if center_shift ** 2 < tol:
+            break
+
+    return choice_cluster.cpu(), initial_state.cpu()
+
+
+def kmeans_predict(
+        X,
+        cluster_centers,
+        distance='euclidean',
+        device=torch.device('cpu')
+):
+    print(f'predicting on {device}..')
+
+    if distance == 'euclidean':
+        pairwise_distance_function = pairwise_distance
+    elif distance == 'cosine':
+        pairwise_distance_function = pairwise_cosine
+    else:
+        raise NotImplementedError
+
+    # convert to float
+    X = X.float()
+
+    # transfer to device
+    X = X.to(device)
+
+    dis = pairwise_distance_function(X, cluster_centers)
+    choice_cluster = torch.argmin(dis, dim=1)
+
+    return choice_cluster.cpu()
+
+
+def pairwise_distance(data1, data2, device=torch.device('cpu')):
+    # transfer to device
+    data1, data2 = data1.to(device), data2.to(device)
+
+    # N*1*M
+    A = data1.unsqueeze(dim=1)
+
+    # 1*N*M
+    B = data2.unsqueeze(dim=0)
+
+    dis = (A - B) ** 2.0
+    # return N*N matrix for pairwise distance
+    dis = dis.sum(dim=-1).squeeze()
+    return dis
+
+
+def pairwise_cosine(data1, data2, device=torch.device('cpu')):
+    # transfer to device
+    data1, data2 = data1.to(device), data2.to(device)
+
+    # N*1*M
+    A = data1.unsqueeze(dim=1)
+
+    # 1*N*M
+    B = data2.unsqueeze(dim=0)
+
+    # normalize the points  | [0.3, 0.4] -> [0.3/sqrt(0.09 + 0.16), 0.4/sqrt(0.09 + 0.16)] = [0.3/0.5, 0.4/0.5]
+    A_normalized = A / A.norm(dim=-1, keepdim=True)
+    B_normalized = B / B.norm(dim=-1, keepdim=True)
+
+    cosine = A_normalized * B_normalized
+
+    # return N*N matrix for pairwise distance
+    cosine_dis = 1 - cosine.sum(dim=-1).squeeze()
+    return cosine_dis
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+__version__ = "0.2"
+
+
+def main():
+    print("TODO")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,60 @@
+Metadata-Version: 2.1
+Name: kmeans-pytorch
+Version: 0.2
+Summary: UNKNOWN
+Home-page: https://github.com/subhadarship/kmeans_pytorch
+Author: Subhadarshi Panda
+Author-email: subhadarshipanda08@gmail.com
+License: License :: OSI Approved :: MIT License
+Description: # K Means using PyTorch
+        PyTorch implementation of kmeans for utilizing GPU
+        
+        # Getting Started
+        ```
+        
+        import torch
+        import numpy as np
+        from kmeans_pytorch import kmeans
+        
+        # data
+        data_size, dims, num_clusters = 1000, 2, 3
+        x = np.random.randn(data_size, dims) / 6
+        x = torch.from_numpy(x)
+        
+        # kmeans
+        cluster_ids_x, cluster_centers = kmeans(
+            X=x, num_clusters=num_clusters, distance='euclidean', device=torch.device('cuda:0')
+        )
+        ```
+        
+        see [`example.ipynb`](https://github.com/subhadarship/kmeans_pytorch/blob/master/example.ipynb) for a more elaborate example
+        
+        # Requirements
+        * [PyTorch](http://pytorch.org/) version >= 1.0.0
+        * Python version >= 3.6
+        
+        # Installation
+        
+        install with `pip`:
+        ```
+        pip install kmeans-pytorch
+        ```
+        
+        **Installing from source**
+        
+        To install from source and develop locally:
+        ```
+        git clone https://github.com/subhadarship/kmeans_pytorch
+        cd kmeans_pytorch
+        pip install --editable .
+        ```
+        
+        # Notes
+        - useful when clustering large number of samples
+        - utilizes GPU for faster matrix computations
+        - support euclidean and cosine distances (for now)
+        
+Platform: UNKNOWN
+Classifier: Programming Language :: Python
+Requires-Python: >=3.6
+Description-Content-Type: text/markdown
@@ -0,0 +1,12 @@
+LICENSE
+MANIFEST.in
+README.md
+setup.py
+kmeans_pytorch/__init__.py
+kmeans_pytorch/main.py
+kmeans_pytorch.egg-info/PKG-INFO
+kmeans_pytorch.egg-info/SOURCES.txt
+kmeans_pytorch.egg-info/dependency_links.txt
+kmeans_pytorch.egg-info/entry_points.txt
+kmeans_pytorch.egg-info/not-zip-safe
+kmeans_pytorch.egg-info/top_level.txt
@@ -0,0 +1 @@
+
@@ -0,0 +1,3 @@
+[console_scripts]
+kmeans_pytorch = kmeans_pytorch.main:main
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Things to include in the built package (besides the packages defined in setup.py)`
	`2`	`+include README.md`
	`3`	`+include LICENSE`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+[console_scripts]`
	`2`	`+kmeans_pytorch = kmeans_pytorch.main:main`
	`3`	`+`