Merge pull request #505 from guardrails-ai/hf-support

zsimjee · web-flow · commit cf18c6ad29ab · 2023-12-14T09:48:25.000-08:00
HuggingFace Support
diff --git a/docs/llm_api_wrappers.md b/docs/llm_api_wrappers.md
@@ -86,7 +86,7 @@ guard = gd.Guard.from_rail(...)
 anthropic_client = Anthropic(api_key="my_api_key")
 
 # Wrap Anthropic API call
-raw_llm_output, guardrail_output = guard(
+raw_llm_output, guardrail_output, *rest = guard(
     anthropic_client.completions.create,
     prompt_params={
         "prompt_param_1": "value_1", 
@@ -100,6 +100,108 @@ raw_llm_output, guardrail_output = guard(
 ```
 
 
+## Hugging Face
+
+### Text Generation Models
+```py
+from guardrails import Guard
+from guardrails.validators import ValidLength, ToxicLanguage
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+# Create your prompt or starting text
+prompt = "Hello, I'm a language model,"
+
+# Setup torch
+torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+
+# Instantiate your tokenizer
+tokenizer = AutoTokenizer.from_pretrained("gpt2")
+
+# Instantiate your model
+model = AutoModelForCausalLM.from_pretrained("gpt2", pad_token_id=tokenizer.eos_token_id).to(torch_device)
+
+# Customize your model inputs if desired.
+# If you don't pass and inputs (`input_ids`, `input_values`, `input_features`, or `pixel_values`)
+# We'll try to do something similar to below using the tokenizer and the prompt.
+# We strongly suggest passing in your own inputs.
+model_inputs = tokenizer(prompt, return_tensors="pt").to(torch_device)
+
+
+# Create the Guard
+guard = Guard.from_string(
+    validators=[
+        ValidLength(
+            min=48,
+            on_fail="fix"
+        ),
+        ToxicLanguage(
+            on_fail="fix"
+        )
+    ],
+    prompt=prompt
+)
+
+# Run the Guard
+response = guard(
+    llm_api=model.generate,
+    max_new_tokens=40,
+    tokenizer=tokenizer,
+    **model_inputs,
+)
+
+# Check the output
+if response.validation_passed:
+    print("validated_output: ", response.validated_output)
+else:
+    print("error: ", response.error)
+
+```
+
+### Pipelines
+```py
+from guardrails import Guard
+from guardrails.validators import ValidLength, ToxicLanguage
+import torch
+from transformers import pipeline
+
+
+# Create your prompt or starting text
+prompt = "What are we having for dinner?"
+
+# Setup pipeline
+generator = pipeline("text-generation", model="facebook/opt-350m")
+
+
+# Create the Guard
+guard = Guard.from_string(
+    validators=[
+        ValidLength(
+            min=48,
+            on_fail="fix"
+        ),
+        ToxicLanguage(
+            on_fail="fix"
+        )
+    ],
+    prompt=prompt
+)
+
+# Run the Guard
+response = guard(
+    llm_api=generator,
+    max_new_tokens=40
+)
+
+if response.validation_passed:
+    print("validated_output: ", response.validated_output)
+else:
+    print("error: ", response.error)
+
+```
+
+
 ## Using Manifest
 [Manifest](https://github.com/HazyResearch/manifest) is a wrapper around most model APIs and supports hosting local models. It can be used as a LLM API.
 
diff --git a/guardrails/llm_providers.py b/guardrails/llm_providers.py
@@ -2,6 +2,7 @@
 
 from pydantic import BaseModel
 
+from guardrails.utils.exception_utils import UserFacingException
 from guardrails.utils.llm_response import LLMResponse
 from guardrails.utils.openai_utils import (
     AsyncOpenAIClient,
@@ -12,6 +13,7 @@
     get_static_openai_create_func,
 )
 from guardrails.utils.pydantic_utils import convert_pydantic_model_to_openai_fn
+from guardrails.utils.safe_get import safe_get
 
 
 class PromptCallableException(Exception):
@@ -287,6 +289,124 @@ def _invoke_llm(
         return LLMResponse(output=anthropic_response.completion)
 
 
+class HuggingFaceModelCallable(PromptCallableBase):
+    def _invoke_llm(
+        self, prompt: str, model_generate: Any, *args, **kwargs
+    ) -> LLMResponse:
+        try:
+            import transformers  # noqa: F401 # type: ignore
+        except ImportError:
+            raise PromptCallableException(
+                "The `transformers` package is not installed. "
+                "Install with `pip install transformers`"
+            )
+        try:
+            import torch
+        except ImportError:
+            raise PromptCallableException(
+                "The `torch` package is not installed. "
+                "Install with `pip install torch`"
+            )
+
+        tokenizer = kwargs.pop("tokenizer")
+        if not tokenizer:
+            raise UserFacingException(
+                ValueError(
+                    "'tokenizer' must be provided in order to use Hugging Face models!"
+                )
+            )
+
+        torch_device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        return_tensors = kwargs.pop("return_tensors", "pt")
+        skip_special_tokens = kwargs.pop("skip_special_tokens", True)
+
+        input_ids = kwargs.pop("input_ids", None)
+        input_values = kwargs.pop("input_values", None)
+        input_features = kwargs.pop("input_features", None)
+        pixel_values = kwargs.pop("pixel_values", None)
+        model_inputs = kwargs.pop("model_inputs", {})
+        if (
+            input_ids is None
+            and input_values is None
+            and input_features is None
+            and pixel_values is None
+            and not model_inputs
+        ):
+            model_inputs = tokenizer(prompt, return_tensors=return_tensors).to(
+                torch_device
+            )
+        else:
+            model_inputs["input_ids"] = input_ids
+            model_inputs["input_values"] = input_values
+            model_inputs["input_features"] = input_features
+            model_inputs["pixel_values"] = pixel_values
+
+        do_sample = kwargs.pop("do_sample", None)
+        temperature = kwargs.pop("temperature", None)
+        if not do_sample and temperature == 0:
+            temperature = None
+
+        model_inputs["do_sample"] = do_sample
+        model_inputs["temperature"] = temperature
+
+        output = model_generate(
+            **model_inputs,
+            **kwargs,
+        )
+
+        # NOTE: This is currently restricted to single outputs
+        # Should we choose to support multiple return sequences,
+        # We would need to either validate all of them
+        # and choose the one with the least failures,
+        # or accept a selection function
+        decoded_output = tokenizer.decode(
+            output[0], skip_special_tokens=skip_special_tokens
+        )
+
+        return LLMResponse(output=decoded_output)
+
+
+class HuggingFacePipelineCallable(PromptCallableBase):
+    def _invoke_llm(self, prompt: str, pipeline: Any, *args, **kwargs) -> LLMResponse:
+        try:
+            import transformers  # noqa: F401 # type: ignore
+        except ImportError:
+            raise PromptCallableException(
+                "The `transformers` package is not installed. "
+                "Install with `pip install transformers`"
+            )
+        try:
+            import torch  # noqa: F401 # type: ignore
+        except ImportError:
+            raise PromptCallableException(
+                "The `torch` package is not installed. "
+                "Install with `pip install torch`"
+            )
+
+        content_key = kwargs.pop("content_key", "generated_text")
+
+        temperature = kwargs.pop("temperature", None)
+        if temperature == 0:
+            temperature = None
+
+        output = pipeline(
+            prompt,
+            temperature=temperature,
+            *args,
+            **kwargs,
+        )
+
+        # NOTE: This is currently restricted to single outputs
+        # Should we choose to support multiple return sequences,
+        # We would need to either validate all of them
+        # and choose the one with the least failures,
+        # or accept a selection function
+        content = safe_get(output[0], content_key)
+
+        return LLMResponse(output=content)
+
+
 class ArbitraryCallable(PromptCallableBase):
     def __init__(self, llm_api: Callable, *args, **kwargs):
         self.llm_api = llm_api
@@ -364,6 +484,42 @@ def get_llm_ask(llm_api: Callable, *args, **kwargs) -> PromptCallableBase:
     except ImportError:
         pass
 
+    try:
+        from transformers import (  # noqa: F401 # type: ignore
+            FlaxPreTrainedModel,
+            GenerationMixin,
+            PreTrainedModel,
+            TFPreTrainedModel,
+        )
+
+        api_self = getattr(llm_api, "__self__", None)
+
+        if (
+            isinstance(api_self, PreTrainedModel)
+            or isinstance(api_self, TFPreTrainedModel)
+            or isinstance(api_self, FlaxPreTrainedModel)
+        ):
+            if (
+                hasattr(llm_api, "__func__")
+                and llm_api.__func__ == GenerationMixin.generate
+            ):
+                return HuggingFaceModelCallable(*args, model_generate=llm_api, **kwargs)
+            raise ValueError("Only text generation models are supported at this time.")
+    except ImportError:
+        pass
+    try:
+        from transformers import Pipeline  # noqa: F401 # type: ignore
+
+        if isinstance(llm_api, Pipeline):
+            # Couldn't find a constant for this
+            if llm_api.task == "text-generation":
+                return HuggingFacePipelineCallable(*args, pipeline=llm_api, **kwargs)
+            raise ValueError(
+                "Only text generation pipelines are supported at this time."
+            )
+    except ImportError:
+        pass
+
     # Let the user pass in an arbitrary callable.
     return ArbitraryCallable(*args, llm_api=llm_api, **kwargs)
 
diff --git a/tests/unit_tests/test_llm_providers.py b/tests/unit_tests/test_llm_providers.py