Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions lib/crewai/src/crewai/crew.py
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,18 @@ def _prepare_tools(
api = getattr(agent.llm, "api", None)
supported_types = get_supported_content_types(provider, api)

# Text files are always auto-injected (inlined as text), even
# when the model does not support multimodal input.
text_prefixes = (
"text/",
"application/json",
"application/xml",
"application/x-yaml",
)

def is_auto_injected(content_type: str) -> bool:
if any(content_type.startswith(t) for t in text_prefixes):
return True
return any(content_type.startswith(t) for t in supported_types)

# Only add read_file tool if there are files that need it
Expand Down
113 changes: 106 additions & 7 deletions lib/crewai/src/crewai/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,10 @@


try:
from crewai_files import aformat_multimodal_content, format_multimodal_content
from crewai_files import (
aformat_multimodal_content,
format_multimodal_content,
)

HAS_CREWAI_FILES = True
except ImportError:
Expand Down Expand Up @@ -2039,6 +2042,10 @@ def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]
For each message with a `files` field, formats the files into
provider-specific content blocks and updates the message content.

Text files (TextFile instances or files with text/* / application/json /
application/xml / application/x-yaml content types) are always inlined
as text content, even when the model does not support multimodal input.

Args:
messages: List of messages that may contain file attachments.

Expand All @@ -2049,11 +2056,55 @@ def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]
return messages

if not self.supports_multimodal():
if any(msg.get("files") for msg in messages):
# Inline text files as text; reject non-text files
has_non_text = False
for msg in messages:
files = msg.get("files")
if not files:
continue

text_parts: list[str] = []
non_text_files: dict[str, Any] = {}
for name, file_input in files.items():
if self._is_text_file(file_input):
try:
content = file_input.read_text()
text_parts.append(
f"--- Content of file '{name}' ---\n{content}"
)
except Exception:
non_text_files[name] = file_input
else:
non_text_files[name] = file_input

if non_text_files:
has_non_text = True

if text_parts:
existing_content = msg.get("content", "")
inlined = "\n\n".join(text_parts)
if isinstance(existing_content, str):
msg["content"] = (
f"{existing_content}\n\n{inlined}"
if existing_content
else inlined
)
elif isinstance(existing_content, list):
msg["content"] = [
*existing_content,
self.format_text_content(inlined),
]

if non_text_files:
msg["files"] = non_text_files
else:
msg.pop("files", None)

if has_non_text:
raise ValueError(
f"Model '{self.model}' does not support multimodal input, "
"but files were provided via 'input_files'. "
"Use a vision-capable model or remove the file inputs."
"but non-text files were provided via 'input_files'. "
"Use a vision-capable model or remove the non-text file inputs."
)
return messages

Expand Down Expand Up @@ -2090,6 +2141,10 @@ async def _aprocess_message_files(
For each message with a `files` field, formats the files into
provider-specific content blocks and updates the message content.

Text files (TextFile instances or files with text/* / application/json /
application/xml / application/x-yaml content types) are always inlined
as text content, even when the model does not support multimodal input.

Args:
messages: List of messages that may contain file attachments.

Expand All @@ -2100,11 +2155,55 @@ async def _aprocess_message_files(
return messages

if not self.supports_multimodal():
if any(msg.get("files") for msg in messages):
# Inline text files as text; reject non-text files
has_non_text = False
for msg in messages:
files = msg.get("files")
if not files:
continue

text_parts: list[str] = []
non_text_files: dict[str, Any] = {}
for name, file_input in files.items():
if self._is_text_file(file_input):
try:
content = file_input.read_text()
text_parts.append(
f"--- Content of file '{name}' ---\n{content}"
)
except Exception:
non_text_files[name] = file_input
else:
non_text_files[name] = file_input

if non_text_files:
has_non_text = True

if text_parts:
existing_content = msg.get("content", "")
inlined = "\n\n".join(text_parts)
if isinstance(existing_content, str):
msg["content"] = (
f"{existing_content}\n\n{inlined}"
if existing_content
else inlined
)
elif isinstance(existing_content, list):
msg["content"] = [
*existing_content,
self.format_text_content(inlined),
]

if non_text_files:
msg["files"] = non_text_files
else:
msg.pop("files", None)

if has_non_text:
raise ValueError(
f"Model '{self.model}' does not support multimodal input, "
"but files were provided via 'input_files'. "
"Use a vision-capable model or remove the file inputs."
"but non-text files were provided via 'input_files'. "
"Use a vision-capable model or remove the non-text file inputs."
)
return messages

Expand Down
82 changes: 77 additions & 5 deletions lib/crewai/src/crewai/llms/base_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@


try:
from crewai_files import format_multimodal_content
from crewai_files import TextFile, format_multimodal_content

HAS_CREWAI_FILES = True
except ImportError:
Expand Down Expand Up @@ -635,6 +635,10 @@ def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]
For each message with a `files` field, formats the files into
provider-specific content blocks and updates the message content.

Text files (TextFile instances or files with text/* / application/json /
application/xml / application/x-yaml content types) are always inlined
as text content, even when the model does not support multimodal input.

Args:
messages: List of messages that may contain file attachments.

Expand All @@ -644,12 +648,61 @@ def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]
if not HAS_CREWAI_FILES:
return messages

if not self.supports_multimodal():
if any(msg.get("files") for msg in messages):
is_multimodal = self.supports_multimodal()

if not is_multimodal:
# Inline text files as text; reject non-text files
has_non_text = False
for msg in messages:
files = msg.get("files")
if not files:
continue

text_parts: list[str] = []
non_text_files: dict[str, Any] = {}
for name, file_input in files.items():
if self._is_text_file(file_input):
try:
content = file_input.read_text()
text_parts.append(
f"--- Content of file '{name}' ---\n{content}"
)
except Exception:
# If reading fails, fall back to tool-based access
non_text_files[name] = file_input
else:
non_text_files[name] = file_input

if non_text_files:
has_non_text = True

# Append inlined text content to the message
if text_parts:
existing_content = msg.get("content", "")
inlined = "\n\n".join(text_parts)
if isinstance(existing_content, str):
msg["content"] = (
f"{existing_content}\n\n{inlined}"
if existing_content
else inlined
)
elif isinstance(existing_content, list):
msg["content"] = [
*existing_content,
self.format_text_content(inlined),
]

# Keep only non-text files (for tool-based access)
if non_text_files:
msg["files"] = non_text_files
else:
msg.pop("files", None)

if has_non_text:
raise ValueError(
f"Model '{self.model}' does not support multimodal input, "
"but files were provided via 'input_files'. "
"Use a vision-capable model or remove the file inputs."
"but non-text files were provided via 'input_files'. "
"Use a vision-capable model or remove the non-text file inputs."
)
return messages

Expand Down Expand Up @@ -680,6 +733,25 @@ def _process_message_files(self, messages: list[LLMMessage]) -> list[LLMMessage]

return messages

@staticmethod
def _is_text_file(file_input: Any) -> bool:
"""Check whether a file input is a text file.

Returns True for TextFile instances or files whose content_type
starts with ``text/`` or matches common text-based MIME types
(application/json, application/xml, application/x-yaml).
"""
if HAS_CREWAI_FILES and isinstance(file_input, TextFile):
return True
content_type = getattr(file_input, "content_type", "")
if content_type.startswith("text/"):
return True
return content_type in (
"application/json",
"application/xml",
"application/x-yaml",
)

@staticmethod
def _validate_structured_output(
response: str,
Expand Down
11 changes: 11 additions & 0 deletions lib/crewai/src/crewai/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,18 @@ def prompt(self) -> str:
api: str | None = getattr(self.agent.llm, "api", None)
supported_types = get_supported_content_types(provider, api)

# Text files are always auto-injected (inlined as text), even
# when the model does not support multimodal input.
text_prefixes = (
"text/",
"application/json",
"application/xml",
"application/x-yaml",
)

def is_auto_injected(content_type: str) -> bool:
if any(content_type.startswith(t) for t in text_prefixes):
return True
return any(content_type.startswith(t) for t in supported_types)

auto_injected_files = {
Expand Down
Loading
Loading