Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: DIA-1415: LabelStudio skill #227

Merged
merged 15 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions adala/skills/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@
from .collection.entity_extraction import EntityExtraction
from .collection.rag import RAGSkill
from .collection.ontology_creation import OntologyCreator, OntologyMerger
from .collection.label_studio import LabelStudioSkill
from ._base import Skill, TransformSkill, AnalysisSkill, SynthesisSkill
71 changes: 71 additions & 0 deletions adala/skills/collection/label_studio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import logging
from typing import Dict, Any, Type
from functools import cached_property
from adala.skills._base import TransformSkill
from pydantic import BaseModel, Field, model_validator

from adala.runtimes import Runtime, AsyncRuntime
from adala.utils.internal_data import InternalDataFrame

from label_studio_sdk.label_interface import LabelInterface
from label_studio_sdk._extensions.label_studio_tools.core.utils.json_schema import json_schema_to_pydantic


logger = logging.getLogger(__name__)


class LabelStudioSkill(TransformSkill):

name: str = "label_studio"
input_template: str = "Annotate the input data according to the provided schema."
# TODO: remove output_template, fix calling @model_validator(mode='after') in the base class
output_template: str = "Output: {field_name}"
response_model: Type[BaseModel] = BaseModel # why validate_response_model is called in the base class?
# ------------------------------
label_config: str = "<View></View>"

# TODO: implement postprocessing like in EntityExtractionSkill or to verify Taxonomy

@model_validator(mode='after')
def validate_response_model(self):

interface = LabelInterface(self.label_config)
logger.debug(f'Read labeling config {self.label_config}')

self.field_schema = interface.to_json_schema()
logger.debug(f'Converted labeling config to json schema: {self.field_schema}')

return self

def _create_response_model_from_field_schema(self):
pass

def apply(
self,
input: InternalDataFrame,
runtime: Runtime,
) -> InternalDataFrame:

with json_schema_to_pydantic(self.field_schema) as ResponseModel:
return runtime.batch_to_batch(
input,
input_template=self.input_template,
output_template="",
instructions_template=self.instructions,
response_model=ResponseModel,
)

async def aapply(
self,
input: InternalDataFrame,
runtime: AsyncRuntime,
) -> InternalDataFrame:

with json_schema_to_pydantic(self.field_schema) as ResponseModel:
return await runtime.batch_to_batch(
input,
input_template=self.input_template,
output_template="",
instructions_template=self.instructions,
response_model=ResponseModel,
)
130 changes: 127 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ celery = {version = "^5.3.6", extras = ["redis"]}
kombu = ">=5.4.0rc2" # Pin version to fix https://github.com/celery/celery/issues/8030. TODO: remove when this fix will be included in celery
uvicorn = "*"
pydantic-settings = "^2.2.1"
label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/81ab3bbed0373fca1fd9166f186bfd0817512331.zip"}
label-studio-sdk = {url = "https://github.com/HumanSignal/label-studio-sdk/archive/e94072130d90f7b89701b9234ce175425f36f23e.zip"}
kafka-python = "^2.0.2"
# https://github.com/geerlingguy/ansible-role-docker/issues/462#issuecomment-2144121102
requests = "2.31.0"
Expand Down
Loading
Loading