microsoft · moonbox3 · Apr 10, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
@@ -0,0 +1,38 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+
+from samples.concepts.audio.audio_player import AudioPlayer
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai import PromptExecutionSettings
+from semantic_kernel.connectors.ai.open_ai import OpenAITextToAudio
+from semantic_kernel.functions import KernelArguments
+
+"""
+This simple sample demonstrates how to use the AzureTextToAudio services
+with a prompt and prompt rendering.
+
+Resources required for this sample: An Azure Text to Speech deployment (e.g. tts).
+
+Additional dependencies required for this sample:
+- pyaudio: run `pip install pyaudio` or `uv pip install pyaudio` if you are using uv.
+"""
+
+
+async def main():
+    kernel = Kernel()
+    kernel.add_service(OpenAITextToAudio(service_id="tts"))
+
+    result = await kernel.invoke_prompt(
+        prompt="speak the following phrase: {{$phrase}}",
+        arguments=KernelArguments(
+            phrase="a painting of a flower vase",
+            settings=PromptExecutionSettings(service_id="tts", voice="coral"),
+        ),
+    )
+    if result:
+        AudioPlayer(audio_content=result.value[0]).play()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,49 @@
+# Copyright (c) Microsoft. All rights reserved.
+
+import asyncio
+from urllib.request import urlopen
+
+try:
+    from PIL import Image
+
+    pil_available = True
+except ImportError:
+    pil_available = False
+
+from semantic_kernel import Kernel
+from semantic_kernel.connectors.ai import PromptExecutionSettings
+from semantic_kernel.connectors.ai.open_ai import OpenAITextToImage
+from semantic_kernel.functions import KernelArguments
+
+"""
+This sample demonstrates how to use the OpenAI text-to-image service to generate an image from a prompt.
+It uses the OpenAITextToImage class to create an image based on the provided prompt and settings.
+The generated image is then displayed using the PIL library if available.
+"""
+
+
+async def main():
+    kernel = Kernel()
+    kernel.add_service(OpenAITextToImage(service_id="dalle3"))
+
+    result = await kernel.invoke_prompt(
+        prompt="Generate a image of {{$topic}} in the style of a {{$style}}",
+        arguments=KernelArguments(
+            topic="a flower vase",
+            style="painting",
+            settings=PromptExecutionSettings(
+                service_id="dalle3",
+                width=1024,
+                height=1024,
+                quality="hd",
+                style="vivid",
+            ),
+        ),
+    )
+    if result and pil_available:
+        img = Image.open(urlopen(str(result.value[0].uri)))  # nosec
+        img.show()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -1,12 +1,11 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 import logging
-from typing import Literal
+from typing import Annotated, Literal
 
-from pydantic import Field, model_validator
+from pydantic import Field
 
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
-from semantic_kernel.exceptions.service_exceptions import ServiceInvalidExecutionSettingsError
 
 logger = logging.getLogger(__name__)
 
@@ -18,13 +17,6 @@ class OpenAITextToAudioExecutionSettings(PromptExecutionSettings):
     input: str | None = Field(
         None, description="Do not set this manually. It is set by the service based on the text content."
     )
-    voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"] = "alloy"
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"] = "alloy"
     response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | None = None
-    speed: float | None = None
-
-    @model_validator(mode="after")
-    def validate_speed(self) -> "OpenAITextToAudioExecutionSettings":
-        """Validate the speed parameter."""
-        if self.speed is not None and (self.speed < 0.25 or self.speed > 4.0):
-            raise ServiceInvalidExecutionSettingsError("Speed must be between 0.25 and 4.0.")
-        return self
+    speed: Annotated[float | None, Field(ge=0.25, le=4.0)] = None
@@ -41,6 +41,26 @@ class OpenAITextToImageExecutionSettings(PromptExecutionSettings):
     quality: str | None = None
     style: str | None = None
 
+    @model_validator(mode="before")
+    @classmethod
+    def get_size(cls, data: dict[str, Any]) -> dict[str, Any]:
+        """Check that the requested image size is valid."""
+        if isinstance(data, dict):
+            if "size" not in data and "width" in data and "height" in data:
+                data["size"] = ImageSize(width=data["width"], height=data["height"])
+            elif "extension_data" in data:
+                extension_data = data["extension_data"]
+                if (
+                    isinstance(extension_data, dict)
+                    and "size" not in extension_data
+                    and "width" in extension_data
+                    and "height" in extension_data
+                ):
+                    data["extension_data"]["size"] = ImageSize(
+                        width=extension_data["width"], height=extension_data["height"]
+                    )
+        return data
+
     @model_validator(mode="after")
     def check_size(self) -> "OpenAITextToImageExecutionSettings":
         """Check that the requested image size is valid."""
@@ -51,16 +71,6 @@ def check_size(self) -> "OpenAITextToImageExecutionSettings":
 
         return self
 
-    @model_validator(mode="after")
-    def check_prompt(self) -> "OpenAITextToImageExecutionSettings":
-        """Check that the prompt is not empty."""
-        prompt = self.prompt or self.extension_data.get("prompt")
-
-        if not prompt:
-            raise ServiceInvalidExecutionSettingsError("The prompt is required.")
-
-        return self
-
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
         """Prepare the settings dictionary for the OpenAI API."""
         settings_dict = super().prepare_settings_dict(**kwargs)

@@ -1,6 +1,7 @@
 # Copyright (c) Microsoft. All rights reserved.
 
 from typing import Any
+from warnings import warn
 
 from openai.types.images_response import ImagesResponse
 
@@ -11,30 +12,55 @@
 from semantic_kernel.connectors.ai.open_ai.services.open_ai_handler import OpenAIHandler
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.connectors.ai.text_to_image_client_base import TextToImageClientBase
-from semantic_kernel.exceptions.service_exceptions import ServiceResponseException
+from semantic_kernel.exceptions.service_exceptions import ServiceInvalidRequestError, ServiceResponseException
 
 
 class OpenAITextToImageBase(OpenAIHandler, TextToImageClientBase):
     """OpenAI text to image client."""
 
-    async def generate_image(self, description: str, width: int, height: int, **kwargs: Any) -> bytes | str:
+    async def generate_image(
+        self,
+        description: str,
+        width: int | None = None,
+        height: int | None = None,
+        settings: PromptExecutionSettings | None = None,
+        **kwargs: Any,
+    ) -> bytes | str:
         """Generate image from text.
 
         Args:
             description: Description of the image.
-            width: Width of the image, check the openai documentation for the supported sizes.
-            height: Height of the image, check the openai documentation for the supported sizes.
+            width: Deprecated, use settings instead.
+            height: Deprecated, use settings instead.
+            settings: Execution settings for the prompt.
             kwargs: Additional arguments, check the openai images.generate documentation for the supported arguments.
 
         Returns:
             bytes | str: Image bytes or image URL.
         """
-        settings = OpenAITextToImageExecutionSettings(
-            prompt=description,
-            size=ImageSize(width=width, height=height),
-            ai_model_id=self.ai_model_id,
-            **kwargs,
-        )
+        if not settings:
+            settings = OpenAITextToImageExecutionSettings(**kwargs)
+        if not isinstance(settings, OpenAITextToImageExecutionSettings):
+            settings = OpenAITextToImageExecutionSettings.from_prompt_execution_settings(settings)
+        if width:
+            warn("The 'width' argument is deprecated. Use 'settings.size' instead.", DeprecationWarning)
+            if settings.size and not settings.size.width:
+                settings.size.width = width
+        if height:
+            warn("The 'height' argument is deprecated. Use 'settings.size' instead.", DeprecationWarning)
+            if settings.size and not settings.size.height:
+                settings.size.height = height
+        if not settings.size and width and height:
+            settings.size = ImageSize(width=width, height=height)
+
+        if not settings.prompt:
+            settings.prompt = description
+
+        if not settings.prompt:
+            raise ServiceInvalidRequestError("Prompt is required.")
+
+        if not settings.ai_model_id:
+            settings.ai_model_id = self.ai_model_id
 
         response = await self._send_request(settings)
 

@@ -68,7 +68,7 @@ def __init__(self, service_id: str | None = None, **kwargs: Any):
     @property
     def keys(self):
         """Get the keys of the prompt execution settings."""
-        return self.model_fields.keys()
+        return self.__class__.model_fields.keys()
 
     def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
         """Prepare the settings as a dictionary for sending to the AI service.
@@ -86,7 +86,7 @@ def prepare_settings_dict(self, **kwargs) -> dict[str, Any]:
             by_alias=True,
         )
 
-    def update_from_prompt_execution_settings(self, config: _T) -> None:
+    def update_from_prompt_execution_settings(self, config: "PromptExecutionSettings") -> None:
         """Update the prompt execution settings from a completion config."""
         if config.service_id is not None:
             self.service_id = config.service_id
@@ -95,7 +95,7 @@ def update_from_prompt_execution_settings(self, config: _T) -> None:
         self.unpack_extension_data()
 
     @classmethod
-    def from_prompt_execution_settings(cls: type[_T], config: _T) -> _T:
+    def from_prompt_execution_settings(cls: type[_T], config: "PromptExecutionSettings") -> _T:
         """Create a prompt execution settings from a completion config."""
         config.pack_extension_data()
         return cls(

@@ -3,23 +3,54 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
+from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
+from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.services.ai_service_client_base import AIServiceClientBase
 
 
 class TextToImageClientBase(AIServiceClientBase, ABC):
     """Base class for text to image client."""
 
     @abstractmethod
-    async def generate_image(self, description: str, width: int, height: int, **kwargs: Any) -> bytes | str:
+    async def generate_image(
+        self,
+        description: str,
+        width: int | None = None,
+        height: int | None = None,
+        settings: PromptExecutionSettings | None = None,
+        **kwargs: Any,
+    ) -> bytes | str:
         """Generate image from text.
 
         Args:
             description: Description of the image.
-            width: Width of the image.
-            height: Height of the image.
+            width: Deprecated, use settings instead.
+            height: Deprecated, use settings instead.
+            settings: Execution settings for the prompt.
             kwargs: Additional arguments.
 
         Returns:
             bytes | str: Image bytes or image URL.
         """
         raise NotImplementedError
+
+    async def get_image_content(
+        self,
+        description: str,
+        settings: PromptExecutionSettings,
+        **kwargs: Any,
+    ) -> ImageContent:
+        """Generate an image from prompt and return an ImageContent.
+
+        Args:
+            description: Description of the image.
+            settings: Execution settings for the prompt.
+            kwargs: Additional arguments.
+
+        Returns:
+            ImageContent: Image content.
+        """
+        image = await self.generate_image(description=description, settings=settings, **kwargs)
+        if isinstance(image, str):
+            return ImageContent(uri=image)
+        return ImageContent(data=image)
@@ -12,9 +12,13 @@
 from semantic_kernel.connectors.ai.chat_completion_client_base import ChatCompletionClientBase
 from semantic_kernel.connectors.ai.prompt_execution_settings import PromptExecutionSettings
 from semantic_kernel.connectors.ai.text_completion_client_base import TextCompletionClientBase
+from semantic_kernel.connectors.ai.text_to_audio_client_base import TextToAudioClientBase
+from semantic_kernel.connectors.ai.text_to_image_client_base import TextToImageClientBase
 from semantic_kernel.const import DEFAULT_SERVICE_NAME
+from semantic_kernel.contents.audio_content import AudioContent
 from semantic_kernel.contents.chat_history import ChatHistory
 from semantic_kernel.contents.chat_message_content import ChatMessageContent
+from semantic_kernel.contents.image_content import ImageContent
 from semantic_kernel.contents.text_content import TextContent
 from semantic_kernel.exceptions import FunctionExecutionException, FunctionInitializationError
 from semantic_kernel.exceptions.function_exceptions import PromptRenderingException
@@ -204,6 +208,34 @@ async def _invoke_internal(self, context: FunctionInvocationContext) -> None:
             )
             return
 
+        if isinstance(prompt_render_result.ai_service, TextToImageClientBase):
+            try:
+                images = await prompt_render_result.ai_service.get_image_content(
+                    description=unescape(prompt_render_result.rendered_prompt),
+                    settings=prompt_render_result.execution_settings,
+                )
+            except Exception as exc:
+                raise FunctionExecutionException(f"Error occurred while invoking function {self.name}: {exc}") from exc
+
+            context.result = self._create_function_result(
+                completions=[images], arguments=context.arguments, prompt=prompt_render_result.rendered_prompt
+            )
+            return
+
+        if isinstance(prompt_render_result.ai_service, TextToAudioClientBase):
+            try:
+                audio = await prompt_render_result.ai_service.get_audio_content(
+                    text=unescape(prompt_render_result.rendered_prompt),
+                    settings=prompt_render_result.execution_settings,
+                )
+            except Exception as exc:
+                raise FunctionExecutionException(f"Error occurred while invoking function {self.name}: {exc}") from exc
+
+            context.result = self._create_function_result(
+                completions=[audio], arguments=context.arguments, prompt=prompt_render_result.rendered_prompt
+            )
+            return
+
         raise ValueError(f"Service `{type(prompt_render_result.ai_service).__name__}` is not a valid AI service")
 
     async def _invoke_internal_stream(self, context: FunctionInvocationContext) -> None:
@@ -253,7 +285,9 @@ async def _render_prompt(
         if prompt_render_context.rendered_prompt is None:
             raise PromptRenderingException("Prompt rendering failed, no rendered prompt was returned.")
         selected_service: tuple["AIServiceClientBase", PromptExecutionSettings] = context.kernel.select_ai_service(
-            function=self, arguments=context.arguments
+            function=self,
+            arguments=context.arguments,
+            type=(TextCompletionClientBase, ChatCompletionClientBase) if prompt_render_context.is_streaming else None,
         )
         return PromptRenderingResult(
             rendered_prompt=prompt_render_context.rendered_prompt,
@@ -268,7 +302,7 @@ async def _inner_render_prompt(self, context: PromptRenderContext) -> None:
 
     def _create_function_result(
         self,
-        completions: list[ChatMessageContent] | list[TextContent],
+        completions: list[ChatMessageContent] | list[TextContent] | list[ImageContent] | list[AudioContent],
         arguments: KernelArguments,
         chat_history: ChatHistory | None = None,
         prompt: str | None = None,