Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/superannotate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys


__version__ = "4.4.23"
__version__ = "4.4.24dev2"

sys.path.append(os.path.split(os.path.realpath(__file__))[0])

Expand Down
44 changes: 44 additions & 0 deletions src/superannotate/lib/app/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,50 @@ def get_s3_annotation_paths(folder_path, s3_bucket, annotation_paths, recursive)
return list(set(annotation_paths))


def convert_column_to_lowercase(df, column_name):
actual_column_name = next(
(col for col in df.columns if col.lower() == column_name.lower()), None
)
if actual_column_name:
df = df.rename(columns={actual_column_name: column_name})
else:
raise Exception(f"Column '{column_name}' not found.")
return df


def truncate_long_names(name, length=120):
if len(name) > length:
return name[:length]
else:
return name


def get_gen_ai_csv_data(csv_path):
def serializer_name(val):
if not str(val).strip():
val = str(uuid.uuid4())
val = truncate_long_names(val)
return val

def df_preprocessing(df):
"""
Convert the name column to lowercase
Fill all empty cells with empty strings
Truncating the name column or generating UUID for empties
:param df:
:return: df
"""
df = convert_column_to_lowercase(df, "_item_name")
df = df.fillna("")
df["_item_name"] = df["_item_name"].apply(serializer_name)
return df

df = pd.read_csv(csv_path, engine="python", quotechar='"', dtype=str)
df = df.drop(columns=["_folder"], errors="ignore")
df = df_preprocessing(df)
return df.to_dict(orient="records")


def get_name_url_duplicated_from_csv(csv_path):
image_data = pd.read_csv(csv_path, dtype=str)
image_data.replace({pd.NA: None}, inplace=True)
Expand Down
151 changes: 100 additions & 51 deletions src/superannotate/lib/app/interface/sdk_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import lib.core as constants
from lib.app.helpers import get_annotation_paths
from lib.app.helpers import get_name_url_duplicated_from_csv
from lib.app.helpers import get_gen_ai_csv_data
from lib.app.helpers import wrap_error as wrap_validation_errors
from lib.app.interface.base_interface import BaseInterfaceFacade
from lib.app.interface.base_interface import TrackableMeta
Expand All @@ -45,6 +46,7 @@
from lib.core.conditions import Condition
from lib.core.conditions import EmptyCondition
from lib.core.entities import AttachmentEntity
from lib.core.entities import GenAIAttachmentEntity
from lib.core.entities import WorkflowEntity
from lib.core.entities import SettingEntity
from lib.core.entities.classes import AnnotationClassEntity
Expand Down Expand Up @@ -112,6 +114,12 @@ class Attachment(TypedDict, total=False):
integration: NotRequired[str] # noqa


class GenAIAttachment(TypedDict, total=False):
_item_name: Optional[str]
_item_category: Optional[str]
# compoenmt id value map


class SAClient(BaseInterfaceFacade, metaclass=TrackableMeta):
"""Create SAClient instance to authorize SDK in a team scope.
In case of no argument has been provided, SA_TOKEN environmental variable
Expand Down Expand Up @@ -2653,7 +2661,7 @@ def search_items(
def attach_items(
self,
project: Union[NotEmptyStr, dict],
attachments: Union[NotEmptyStr, Path, conlist(Attachment, min_items=1)],
attachments: Union[NotEmptyStr, Path, List[dict]],
annotation_status: Optional[ANNOTATION_STATUS] = "NotStarted",
):
"""Link items from external storage to SuperAnnotate using URLs.
Expand Down Expand Up @@ -2701,65 +2709,106 @@ def attach_items(
}
]
)

Example of attaching items for GenAI projects:
::

client = SAClient()
client.attach_items(
project="Medical Annotations",
attachments=[
{
"_item_name": "item",
"_category": "heart",
"category_text_input": "value1",
"category_numeric_input": 5,
"category_approve_input": 0,
"category_rating_input": 4,
"category_slider_input": 23,
"category_multiselect": ["Option 1"]
"category_checkbox_input": ["Option 1","Option 3"],
}
]
)
"""

project_name, folder_name = extract_project_folder(project)
try:
attachments = parse_obj_as(List[AttachmentEntity], attachments)
unique_attachments = set(attachments)
duplicate_attachments = [
item
for item, count in collections.Counter(attachments).items()
if count > 1
]
except ValidationError:
(
unique_attachments,
duplicate_attachments,
) = get_name_url_duplicated_from_csv(attachments)
if duplicate_attachments:
logger.info("Dropping duplicates.")
unique_attachments = parse_obj_as(List[AttachmentEntity], unique_attachments)
project, folder = self.controller.get_project_folder(project_name, folder_name)
uploaded, fails, duplicated = [], [], []
_unique_attachments = []
if any(i.integration for i in unique_attachments):
integtation_item_map = {
i.name: i
for i in self.controller.integrations.list().data
if i.type == IntegrationTypeEnum.CUSTOM
}
invalid_integrations = set()
for attachment in unique_attachments:
if attachment.integration:
if attachment.integration in integtation_item_map:
attachment.integration_id = integtation_item_map[
attachment.integration
].id
else:
invalid_integrations.add(attachment.integration)
continue
_unique_attachments.append(attachment)
if invalid_integrations:
logger.error(
f"The ['{','.join(invalid_integrations)}'] integrations specified for the items doesn't exist in the "
"list of integrations on the platform. Any associated items will be skipped."
if project.type == ProjectType.GEN_AI.value:
if isinstance(attachments, (str, Path)):
attachments = parse_obj_as(
List[GenAIAttachmentEntity],
get_gen_ai_csv_data(csv_path=attachments),
)
else:
_unique_attachments = unique_attachments

if _unique_attachments:
logger.info(
f"Attaching {len(_unique_attachments)} file(s) to project {project}."
)
project, folder = self.controller.get_project_folder(
project_name, folder_name
)
response = self.controller.items.attach(
else:
attachments = parse_obj_as(List[GenAIAttachmentEntity], attachments)
response = self.controller.items.attach_gen_ai_data(
project=project,
folder=folder,
attachments=_unique_attachments,
attachments=attachments,
annotation_status=annotation_status,
user=self.controller.current_user,
)
uploaded, duplicated, failed = response.data
else:
try:
attachments = parse_obj_as(List[AttachmentEntity], attachments)
unique_attachments = set(attachments)
duplicate_attachments = [
item
for item, count in collections.Counter(attachments).items()
if count > 1
]
except ValidationError:
(
unique_attachments,
duplicate_attachments,
) = get_name_url_duplicated_from_csv(attachments)
if duplicate_attachments:
logger.info("Dropping duplicates.")
unique_attachments = parse_obj_as(
List[AttachmentEntity], unique_attachments
)
_unique_attachments = []
if any(i.integration for i in unique_attachments):
integtation_item_map = {
i.name: i
for i in self.controller.integrations.list().data
if i.type == IntegrationTypeEnum.CUSTOM
}
invalid_integrations = set()
for attachment in unique_attachments:
if attachment.integration:
if attachment.integration in integtation_item_map:
attachment.integration_id = integtation_item_map[
attachment.integration
].id
else:
invalid_integrations.add(attachment.integration)
continue
_unique_attachments.append(attachment)
if invalid_integrations:
logger.error(
f"The ['{','.join(invalid_integrations)}'] integrations specified for the items doesn't exist in the "
"list of integrations on the platform. Any associated items will be skipped."
)
else:
_unique_attachments = unique_attachments

if _unique_attachments:
logger.info(
f"Attaching {len(_unique_attachments)} file(s) to project {project}."
)
project, folder = self.controller.get_project_folder(
project_name, folder_name
)
response = self.controller.items.attach(
project=project,
folder=folder,
attachments=_unique_attachments,
annotation_status=annotation_status,
)
if response.errors:
raise AppException(response.errors)
uploaded, duplicated = response.data
Expand Down
2 changes: 2 additions & 0 deletions src/superannotate/lib/core/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from lib.core.entities.items import VideoEntity
from lib.core.entities.project import AttachmentEntity
from lib.core.entities.project import ContributorEntity
from lib.core.entities.project import GenAIAttachmentEntity
from lib.core.entities.project import MLModelEntity
from lib.core.entities.project import ProjectEntity
from lib.core.entities.project import SettingEntity
Expand All @@ -37,6 +38,7 @@
"DocumentEntity",
# Utils
"AttachmentEntity",
"GenAIAttachmentEntity",
# project
"ProjectEntity",
"ContributorEntity",
Expand Down
23 changes: 23 additions & 0 deletions src/superannotate/lib/core/entities/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,29 @@ def __hash__(self):
return hash(self.name)


class GenAIAttachmentEntity(BaseModel):
_item_name: Optional[str] = Field(default_factory=lambda: str(uuid.uuid4()))
_item_category: Optional[str] = None

integration: Optional[str] = None
integration_id: Optional[int] = None

@property
def name(self):
return self._item_name

@property
def item_categoty(self):
return self._item_category

class Config:
include_private_fields = True
extra = Extra.allow

def __hash__(self):
return hash(self.name)


class WorkflowEntity(BaseModel):
id: Optional[int]
project_id: Optional[int]
Expand Down
1 change: 1 addition & 0 deletions src/superannotate/lib/core/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ def start_progress(
description: str = "Processing",
disable=False,
):
disable = disable or not self._log_info
self.progress_bar = self.get_progress_bar(iterations, description, disable)

@staticmethod
Expand Down
24 changes: 22 additions & 2 deletions src/superannotate/lib/core/serviceproviders.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
from lib.core.service_types import UserLimitsResponse
from lib.core.service_types import UserResponse
from lib.core.types import Attachment
from lib.core.types import AttachmentMeta


class BaseClient(ABC):
Expand Down Expand Up @@ -154,6 +153,26 @@ def upload_priority_scores(
) -> ServiceResponse:
raise NotImplementedError

@abstractmethod
def list_categories(
self,
project_id: int,
):
raise NotImplementedError

@abstractmethod
def create_categories(self, project_id: int, categories: List[str]):
raise NotImplementedError

@abstractmethod
def attach_categories(
self,
project_id: int,
folder_id: int,
item_id_category_id_map: Dict[int, dict],
):
raise NotImplementedError


class BaseFolderService(SuperannotateServiceProvider):
@abstractmethod
Expand Down Expand Up @@ -262,7 +281,7 @@ def attach(
attachments: List[Attachment],
annotation_status_code,
upload_state_code,
meta: Dict[str, AttachmentMeta],
meta: Dict[str, dict],
) -> ServiceResponse:
raise NotImplementedError

Expand Down Expand Up @@ -376,6 +395,7 @@ async def upload_small_annotations(
project: entities.ProjectEntity,
folder: entities.FolderEntity,
items_name_data_map: Dict[str, dict],
transform_version: str = None,
) -> UploadAnnotationsResponse:
raise NotImplementedError

Expand Down
Loading