Significant-Gravitas · freddyaboulton · Apr 21, 2023 · Apr 21, 2023 · Apr 21, 2023 · Apr 23, 2023
diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ twine
 tweepy
 pandas
 auto_gpt_plugin_template
-python-dotenv
+python-dotenv
+gradio_tools>=0.0.5
diff --git a/src/autogpt_plugins/gradio-tools/README.md b/src/autogpt_plugins/gradio-tools/README.md
@@ -0,0 +1,35 @@
+# freddyaboulton/gradio-tools 🤝
+
+A plugin giving AutoGPT access to [Gradio](https://github.com/gradio-app/gradio) spaces running on
+the [huggingface hub](https://huggingface.co/spaces) and elsewhere!
+
+Integration powered by [gradio-tools](https://github.com/freddyaboulton/gradio-tools)
+
+gradio-tools comes with a set of pre-built tools but it is easy to add new tools. 
+
+All contributions to `gradio-tools` and this plugin are welcome!
+
+## Features
+
+Each tool specified via the env file will add a command that gives AutoGPT
+the ability to call that gradio app programmatically and get its prediciton. 
+
+For example, an LLM could use a Gradio tool to transcribe a voice recording it finds online and then summarize it for you. Or it could use a different Gradio tool to apply OCR to a document on your Google Drive and then answer questions about it.
+
+## Installation
+
+1. Download this repository, and save it as `autogpt-gradiot-ools.zip`
+2. Place the `.zip` file in the plugins directory of your AutoGPT install
+3. Add your twitter API information to the `.env` file within AutoGPT:
+
+```
+################################################################################
+### GRADIO-TOOLS
+################################################################################
+
+# Consumer Keys are also known as API keys on the dev portal
+
+AUTOGPT_GRADIO_TOOLS=StableDiffusion,ImageCaptioner
+GRADIO_TOOLS_HF_TOKEN=<Optional hs token to clone spaces and avoid rate limits>
+```
+
diff --git a/src/autogpt_plugins/gradio-tools/__init__.py b/src/autogpt_plugins/gradio-tools/__init__.py
@@ -0,0 +1,239 @@
+"""Twitter API integrations using Tweepy."""
+from typing import Any, Dict, List, Optional, Tuple, TypedDict, TypeVar
+from dotenv import load_dotenv
+from auto_gpt_plugin_template import AutoGPTPluginTemplate
+from pathlib import Path
+import os
+from .tools import (AutoGPTClipInterrogatorTool,
+                    AutoGPTStableDiffusion,
+                    AutoGPTWhisperTool,
+                    AutoGPTTextToVideoTool,
+                    AutoGPTCaptioner,
+                    AutoGPTPromptGeneratorTool,
+                    AutoGPTImageToMusicTool,
+                    AutoGPTDocumentAnsweringTool)
+from gradio_tools import GradioTool
+
+PromptGenerator = TypeVar("PromptGenerator")
+
+
+with open(str(Path(os.getcwd()) / ".env"), 'r') as fp:
+    load_dotenv(stream=fp)
+
+
+TOOLS = [
+    AutoGPTStableDiffusion(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTCaptioner(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTWhisperTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTTextToVideoTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTPromptGeneratorTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTDocumentAnsweringTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTImageToMusicTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN")),
+    AutoGPTClipInterrogatorTool(hf_token=os.getenv("GRADIO_TOOLS_HF_TOKEN"))
+]
+
+def get_tool(tool: str) -> GradioTool:
+    return next(t for t in TOOLS if t.name == tool)
+
+
+class Message(TypedDict):
+    role: str
+    content: str
+
+
+class AutoGPTGradioTools(AutoGPTPluginTemplate):
+    """
+    Twitter API integrations using Tweepy
+    """
+
+    def __init__(self):
+        super().__init__()
+        self._name = "autogpt-gradio-tools"
+        self._version = "0.1.0"
+        self._description = "Calling Gradio Apps ."
+        ts = [t for t in os.getenv("AUTOGPT_GRADIO_TOOLS", "").split(",") if t != ""]
+        self.tools = [get_tool(t) for t in ts]
+
+    def can_handle_on_response(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_response method.
+        Returns:
+            bool: True if the plugin can handle the on_response method."""
+        return False
+
+    def on_response(self, response: str, *args, **kwargs) -> str:
+        """This method is called when a response is received from the model."""
+        pass
+
+    def can_handle_post_prompt(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_prompt method.
+        Returns:
+            bool: True if the plugin can handle the post_prompt method."""
+        return True
+
+    def can_handle_on_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_planning method.
+        Returns:
+            bool: True if the plugin can handle the on_planning method."""
+        return False
+
+    def on_planning(
+        self, prompt: PromptGenerator, messages: List[str]
+    ) -> Optional[str]:
+        """This method is called before the planning chat completeion is done.
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+            messages (List[str]): The list of messages.
+        """
+        pass
+
+    def can_handle_post_planning(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_planning method.
+        Returns:
+            bool: True if the plugin can handle the post_planning method."""
+        return False
+
+    def post_planning(self, response: str) -> str:
+        """This method is called after the planning chat completeion is done.
+        Args:
+            response (str): The response.
+        Returns:
+            str: The resulting response.
+        """
+        pass
+
+    def can_handle_pre_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_instruction method.
+        Returns:
+            bool: True if the plugin can handle the pre_instruction method."""
+        return False
+
+    def pre_instruction(self, messages: List[str]) -> List[str]:
+        """This method is called before the instruction chat is done.
+        Args:
+            messages (List[str]): The list of context messages.
+        Returns:
+            List[str]: The resulting list of messages.
+        """
+        pass
+
+    def can_handle_on_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the on_instruction method.
+        Returns:
+            bool: True if the plugin can handle the on_instruction method."""
+        return False
+
+    def on_instruction(self, messages: List[str]) -> Optional[str]:
+        """This method is called when the instruction chat is done.
+        Args:
+            messages (List[str]): The list of context messages.
+        Returns:
+            Optional[str]: The resulting message.
+        """
+        pass
+
+    def can_handle_post_instruction(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_instruction method.
+        Returns:
+            bool: True if the plugin can handle the post_instruction method."""
+        return False
+
+    def post_instruction(self, response: str) -> str:
+        """This method is called after the instruction chat is done.
+        Args:
+            response (str): The response.
+        Returns:
+            str: The resulting response.
+        """
+        pass
+
+    def can_handle_pre_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the pre_command method.
+        Returns:
+            bool: True if the plugin can handle the pre_command method."""
+        return False
+
+    def pre_command(
+        self, command_name: str, arguments: Dict[str, Any]
+    ) -> Tuple[str, Dict[str, Any]]:
+        """This method is called before the command is executed.
+        Args:
+            command_name (str): The command name.
+            arguments (Dict[str, Any]): The arguments.
+        Returns:
+            Tuple[str, Dict[str, Any]]: The command name and the arguments.
+        """
+        pass
+
+    def can_handle_post_command(self) -> bool:
+        """This method is called to check that the plugin can
+        handle the post_command method.
+        Returns:
+            bool: True if the plugin can handle the post_command method."""
+        return False
+
+    def post_command(self, command_name: str, response: str) -> str:
+        """This method is called after the command is executed.
+        Args:
+            command_name (str): The command name.
+            response (str): The response.
+        Returns:
+            str: The resulting response.
+        """
+        pass
+
+    def can_handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> bool:
+        """This method is called to check that the plugin can
+        handle the chat_completion method.
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+        Returns:
+            bool: True if the plugin can handle the chat_completion method."""
+        return False
+
+    def handle_chat_completion(
+        self,
+        messages: list[Dict[Any, Any]],
+        model: str,
+        temperature: float,
+        max_tokens: int,
+    ) -> str:
+        """This method is called when the chat completion is done.
+        Args:
+            messages (Dict[Any, Any]): The messages.
+            model (str): The model name.
+            temperature (float): The temperature.
+            max_tokens (int): The max tokens.
+        Returns:
+            str: The resulting response.
+        """
+        return None
+
+    def post_prompt(self, prompt: PromptGenerator) -> PromptGenerator:
+        """This method is called just after the generate_prompt is called,
+            but actually before the prompt is generated.
+        Args:
+            prompt (PromptGenerator): The prompt generator.
+        Returns:
+            PromptGenerator: The prompt generator.
+        """
+        for tool in self.tools:
+            prompt.add_command(tool.description, tool.name.lower(), tool.args, tool.run)
+
+        return prompt
diff --git a/src/autogpt_plugins/gradio-tools/tools.py b/src/autogpt_plugins/gradio-tools/tools.py
@@ -0,0 +1,107 @@
+from gradio_tools import (
+    StableDiffusionTool,
+    ImageCaptioningTool,
+    TextToVideoTool,
+    StableDiffusionPromptGeneratorTool,
+    WhisperAudioTranscriptionTool,
+    DocQueryDocumentAnsweringTool,
+    ClipInterrogatorTool,
+    ImageToMusicTool,
+)
+from gradio_tools.tools.gradio_tool import Job
+from pathlib import Path
+import os
+
+
+WORKSPACE_DIR = (Path(os.getcwd()) / "auto_gpt_workspace").resolve()
+
+
+class AutoGPTCaptioner(ImageCaptioningTool):
+    def __init__(
+        self,
+        name="ImageCaptioner",
+        description="An image captioner. Use this to create a caption for an image. "
+        "Input will be a path to an image file. "
+        "The output will be a caption of that image.",
+        src="taesiri/BLIP-2",
+        hf_token=None
+    ) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"img": "<full-path-to-image>"}
+
+
+class AutoGPTStableDiffusion(StableDiffusionTool):
+    def __init__(
+        self,
+        name="StableDiffusion",
+        description="An image generator. Use this to generate images based on "
+        "text input. Input should be a description of what the image should "
+        "look like. The output will be a path to an image file.",
+        src="gradio-client-demos/stable-diffusion",
+        hf_token=None,
+    ) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"prompt": "text description of image"}
+
+
+class AutoGPTWhisperTool(WhisperAudioTranscriptionTool):
+    def __init__(
+        self,
+        name="Whisper",
+        description="A tool for transcribing audio. Use this tool to transcribe an audio file. "
+        "track from an image. Input will be a path to an audio file. "
+        "The output will the text transcript of that file.",
+        src="abidlabs/whisper",
+        hf_token=None,
+    ) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"audio": "full path of audio file"}
+
+
+class AutoGPTTextToVideoTool(TextToVideoTool):
+    def __init__(
+        self,
+        name="TextToVideo",
+        description="A tool for creating videos from text."
+        "Use this tool to create videos from text prompts. "
+        "Input will be a text prompt describing a video scene. "
+        "The output will be a path to a video file.",
+        src="damo-vilab/modelscope-text-to-video-synthesis",
+        hf_token=None,
+    ) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"prompt": "text description of video"}
+
+
+class AutoGPTPromptGeneratorTool(StableDiffusionPromptGeneratorTool):
+    def __init__(
+        self,
+        name="StableDiffusionPromptGenerator",
+        description="Use this tool to improve a prompt for stable diffusion and other image generators "
+        "This tool will refine your prompt to include key words and phrases that make "
+        "stable diffusion perform better. The input is a prompt text string "
+        "and the output is a prompt text string",
+        src="microsoft/Promptist",
+        hf_token=None,
+    ) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"prompt": "text description of image"}
+
+
+class AutoGPTDocumentAnsweringTool(DocQueryDocumentAnsweringTool):
+
+    def __init__(self, name="DocQuery", description="A tool for answering questions about a document from the from the image of the document. Input will be a two strings separated by a comma: the first will be the path or URL to an image of a document. The second will be your question about the document." "The output will the text answer to your question.", src="abidlabs/docquery", hf_token=None) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"args": "Two strings separated by a comma: the first will be the path or URL to an image of a document. The second will be your question about the document."}
+
+
+class AutoGPTClipInterrogatorTool(ClipInterrogatorTool):
+    def __init__(self, name="ClipInterrogator", description="A tool for reverse engineering a prompt from a source image. " "Use this tool to create a prompt for StableDiffusion that matches the " "input image. The imput is a path to an image. The output is a text string.", src="pharma/CLIP-Interrogator", hf_token=None) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"image": "The full path to the image file"}
+
+
+class AutoGPTImageToMusicTool(ImageToMusicTool):
+    def __init__(self, name="ImagetoMusic", description="A tool for creating music from images. Use this tool to create a musical " "track from an image. Input will be a path to an image file. " "The output will be an audio file generated from that image.", src="fffiloni/img-to-music", hf_token=None) -> None:
+        super().__init__(name, description, src, hf_token)
+        self.args = {"image": "The full path to the image file"}