From 78158a8133d87fb1af780399ebf31d91386f2f54 Mon Sep 17 00:00:00 2001 From: Bideri Alec <101627722+alecbideri@users.noreply.github.com> Date: Mon, 29 Dec 2025 21:34:28 +0200 Subject: [PATCH 1/2] =?UTF-8?q?feat(samples):=20add=20research=5Forchestra?= =?UTF-8?q?tion=20multi-model=20sample=20Add=20a=20new=20sample=20demonstr?= =?UTF-8?q?ating=20multi-agent=20orchestration=20with:=20-=20SequentialAge?= =?UTF-8?q?nt=20pipeline=20(Search=20=E2=86=92=20Scrape=20=E2=86=92=20Cura?= =?UTF-8?q?te=20=E2=86=92=20Write)=20-=20Multi-model=20support=20(Gemini?= =?UTF-8?q?=20+=20DeepSeek=20via=20LiteLLM)=20-=20Web=20scraping=20tool=20?= =?UTF-8?q?using=20BeautifulSoup=20-=20Google=20Search=20grounding=20tool?= =?UTF-8?q?=20This=20sample=20shows=20how=20to=20combine=20different=20LLM?= =?UTF-8?q?=20providers=20in=20a=20single=20orchestration=20for=20research?= =?UTF-8?q?=20and=20report=20generation.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../samples/research_orchestration/README.md | 39 ++++ .../research_orchestration/__init__.py | 15 ++ .../samples/research_orchestration/agent.py | 196 ++++++++++++++++++ 3 files changed, 250 insertions(+) create mode 100644 contributing/samples/research_orchestration/README.md create mode 100644 contributing/samples/research_orchestration/__init__.py create mode 100644 contributing/samples/research_orchestration/agent.py diff --git a/contributing/samples/research_orchestration/README.md b/contributing/samples/research_orchestration/README.md new file mode 100644 index 0000000000..ad11d6364f --- /dev/null +++ b/contributing/samples/research_orchestration/README.md @@ -0,0 +1,39 @@ +# Research Orchestration Agent + +A multi-agent research pipeline that combines **Gemini** and **DeepSeek** models to search, curate, and synthesize information from the internet. + +## Architecture + +``` +User Query → SearchAgent (Gemini) → ScraperAgent (DeepSeek) → CuratorAgent (Gemini) → WriterAgent (DeepSeek) → Final Report +``` + +## Agent Configuration + +| Agent | Model | Purpose | +|-------|-------|---------| +| SearchAgent | gemini-2.5-flash | Google Search grounding for finding sources | +| ScraperAgent | deepseek-chat | Extract content from web pages | +| CuratorAgent | gemini-2.5-flash | Filter and organize information | +| WriterAgent | deepseek-chat | Synthesize final report | + +## Requirements + +- `GOOGLE_API_KEY` - For Gemini models +- `DEEPSEEK_API_KEY` - For DeepSeek models +- LiteLLM installed (`pip install litellm`) +- BeautifulSoup installed (`pip install beautifulsoup4 lxml`) + +## Usage + +```bash +# CLI +adk run contributing/samples/research_orchestration + +# Web UI +adk web contributing/samples +``` + +## Example Query + +"What are the latest developments in AI agent frameworks in 2025?" diff --git a/contributing/samples/research_orchestration/__init__.py b/contributing/samples/research_orchestration/__init__.py new file mode 100644 index 0000000000..c48963cdc7 --- /dev/null +++ b/contributing/samples/research_orchestration/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent diff --git a/contributing/samples/research_orchestration/agent.py b/contributing/samples/research_orchestration/agent.py new file mode 100644 index 0000000000..2e2ed6a0e2 --- /dev/null +++ b/contributing/samples/research_orchestration/agent.py @@ -0,0 +1,196 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Research Orchestration Agent + +A multi-agent pipeline that searches the internet, curates information, +and synthesizes reports using a mix of Gemini and DeepSeek models. + +Model Configuration: +- SearchAgent: Gemini 2.5 Flash (for Google Search grounding) +- ScraperAgent: DeepSeek (for web content extraction) +- CuratorAgent: Gemini 2.5 Flash (for filtering and organizing) +- WriterAgent: DeepSeek (for final report synthesis) +""" + +import requests +from bs4 import BeautifulSoup + +from google.adk.agents.llm_agent import LlmAgent +from google.adk.agents.sequential_agent import SequentialAgent +from google.adk.tools import google_search +from google.adk.models.lite_llm import LiteLlm + + +# --- Custom Tool: Web Page Scraper --- +def load_web_page(url: str) -> str: + """Fetches the content from a URL and returns the text. + + Args: + url (str): The URL to fetch content from. + + Returns: + str: The extracted text content from the web page. + """ + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + } + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + soup = BeautifulSoup(response.content, 'lxml') + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + text = soup.get_text(separator='\n', strip=True) + # Filter out very short lines + lines = [line for line in text.splitlines() if len(line.split()) > 3] + return '\n'.join(lines[:100]) # Limit to first 100 meaningful lines + else: + return f"Failed to fetch URL: {url} (Status: {response.status_code})" + except Exception as e: + return f"Error fetching URL {url}: {str(e)}" + + +# --- Model Configuration --- +# Gemini for search and curation (uses google_search grounding) +GEMINI_MODEL = "gemini-2.5-flash" + +# DeepSeek for scraping and writing (via LiteLLM) +DEEPSEEK_MODEL = LiteLlm(model="deepseek/deepseek-chat") + + +# --- Agent 1: Search Agent (Gemini) --- +# Uses Google Search grounding to find relevant information +search_agent = LlmAgent( + name="SearchAgent", + model=GEMINI_MODEL, + description="Searches the internet for relevant information using Google Search", + instruction=""" + You are a research assistant specialized in finding information online. + + When given a research topic or question: + 1. Use the google_search tool to find relevant and recent information + 2. Identify the most credible and relevant sources + 3. Summarize what you found with key points + 4. List any important URLs that should be explored further + + Focus on finding factual, up-to-date information from reliable sources. + """, + tools=[google_search], + output_key="search_results", +) + + +# --- Agent 2: Scraper Agent (DeepSeek) --- +# Extracts detailed content from web pages +scraper_agent = LlmAgent( + name="ScraperAgent", + model=DEEPSEEK_MODEL, + description="Extracts and processes detailed content from web pages", + instruction=""" + You are a web content extraction specialist. + + Based on the search results provided: + {search_results} + + Your task: + 1. If specific URLs were mentioned in the search results, use the load_web_page tool to extract their content + 2. Process and clean the extracted content + 3. Identify the most valuable information from each source + 4. Compile all extracted information in an organized manner + + Focus on extracting substantive content, ignoring navigation, ads, and boilerplate text. + """, + tools=[load_web_page], + output_key="scraped_content", +) + + +# --- Agent 3: Curator Agent (Gemini) --- +# Filters, organizes, and validates the gathered information +curator_agent = LlmAgent( + name="CuratorAgent", + model=GEMINI_MODEL, + description="Curates, filters, and organizes research information", + instruction=""" + You are an expert information curator and fact-checker. + + Review all gathered information: + - Search Results: {search_results} + - Scraped Content: {scraped_content} + + Your task: + 1. Remove duplicate or redundant information + 2. Filter out low-quality, outdated, or irrelevant content + 3. Verify consistency across sources + 4. Organize information into logical themes or categories + 5. Identify key insights, patterns, and important findings + 6. Note any conflicting information or gaps in knowledge + + Output a well-organized summary of the curated information ready for report writing. + """, + output_key="curated_info", +) + + +# --- Agent 4: Writer Agent (DeepSeek) --- +# Synthesizes the final comprehensive report +writer_agent = LlmAgent( + name="WriterAgent", + model=DEEPSEEK_MODEL, + description="Writes comprehensive reports from curated research", + instruction=""" + You are an expert research report writer. + + Using the curated information provided: + {curated_info} + + Write a comprehensive, well-structured research report that includes: + + 1. **Executive Summary** - Brief overview of key findings (2-3 sentences) + + 2. **Key Findings** - Main discoveries organized by theme, supported by evidence + + 3. **Analysis** - Your interpretation of what the findings mean + + 4. **Recommendations** (if applicable) - Actionable insights based on the research + + 5. **Sources** - List the key sources that informed this report + + Write in a clear, professional tone. Use bullet points for clarity where appropriate. + Ensure the report is informative, accurate, and actionable. + """, + output_key="final_report", +) + + +# --- Main Pipeline: Sequential Orchestration --- +# Chains the agents: Search → Scrape → Curate → Write +root_agent = SequentialAgent( + name="ResearchOrchestration", + sub_agents=[ + search_agent, # Step 1: Find sources (Gemini + Google Search) + scraper_agent, # Step 2: Extract content (DeepSeek) + curator_agent, # Step 3: Organize & filter (Gemini) + writer_agent, # Step 4: Write report (DeepSeek) + ], + description=( + "A research pipeline that searches the internet, extracts content, " + "curates information, and produces a comprehensive report. " + "Uses Gemini for search/curation and DeepSeek for extraction/writing." + ), +) From a2b45ae3ce07c194bc333d088b5d6f8826697fc4 Mon Sep 17 00:00:00 2001 From: Bideri Alec <101627722+alecbideri@users.noreply.github.com> Date: Sat, 3 Jan 2026 11:29:44 +0200 Subject: [PATCH 2/2] fix: apply autoformat and simplify README per review feedback --- .../samples/research_orchestration/README.md | 33 ++++---- .../samples/research_orchestration/agent.py | 77 ++++++++++--------- 2 files changed, 56 insertions(+), 54 deletions(-) diff --git a/contributing/samples/research_orchestration/README.md b/contributing/samples/research_orchestration/README.md index ad11d6364f..d4e7580173 100644 --- a/contributing/samples/research_orchestration/README.md +++ b/contributing/samples/research_orchestration/README.md @@ -1,30 +1,27 @@ # Research Orchestration Agent -A multi-agent research pipeline that combines **Gemini** and **DeepSeek** models to search, curate, and synthesize information from the internet. +A multi-agent research pipeline demonstrating SequentialAgent orchestration +with multi-model support via LiteLLM. ## Architecture ``` -User Query → SearchAgent (Gemini) → ScraperAgent (DeepSeek) → CuratorAgent (Gemini) → WriterAgent (DeepSeek) → Final Report +User Query → SearchAgent → ScraperAgent → CuratorAgent → WriterAgent → Report ``` -## Agent Configuration +## Features Demonstrated -| Agent | Model | Purpose | -|-------|-------|---------| -| SearchAgent | gemini-2.5-flash | Google Search grounding for finding sources | -| ScraperAgent | deepseek-chat | Extract content from web pages | -| CuratorAgent | gemini-2.5-flash | Filter and organize information | -| WriterAgent | deepseek-chat | Synthesize final report | +* **SequentialAgent** - Pipeline orchestration pattern +* **LiteLLM integration** - Multi-model support +* **Google Search grounding** - Built-in search tool +* **Custom tools** - Web scraping with BeautifulSoup -## Requirements +## Sample Query -- `GOOGLE_API_KEY` - For Gemini models -- `DEEPSEEK_API_KEY` - For DeepSeek models -- LiteLLM installed (`pip install litellm`) -- BeautifulSoup installed (`pip install beautifulsoup4 lxml`) +* What are the latest developments in AI agent frameworks? +* Research the current state of autonomous agents. -## Usage +## To Run ```bash # CLI @@ -34,6 +31,8 @@ adk run contributing/samples/research_orchestration adk web contributing/samples ``` -## Example Query +## Requirements -"What are the latest developments in AI agent frameworks in 2025?" +* `google-adk` with LiteLLM extension: `pip install google-adk[extensions]` +* BeautifulSoup: `pip install beautifulsoup4 lxml` +* Configure API keys in `.env` for your chosen models diff --git a/contributing/samples/research_orchestration/agent.py b/contributing/samples/research_orchestration/agent.py index 2e2ed6a0e2..1520c866e7 100644 --- a/contributing/samples/research_orchestration/agent.py +++ b/contributing/samples/research_orchestration/agent.py @@ -15,7 +15,7 @@ """ Research Orchestration Agent -A multi-agent pipeline that searches the internet, curates information, +A multi-agent pipeline that searches the internet, curates information, and synthesizes reports using a mix of Gemini and DeepSeek models. Model Configuration: @@ -25,44 +25,45 @@ - WriterAgent: DeepSeek (for final report synthesis) """ -import requests from bs4 import BeautifulSoup - from google.adk.agents.llm_agent import LlmAgent from google.adk.agents.sequential_agent import SequentialAgent -from google.adk.tools import google_search from google.adk.models.lite_llm import LiteLlm +from google.adk.tools import google_search +import requests # --- Custom Tool: Web Page Scraper --- def load_web_page(url: str) -> str: - """Fetches the content from a URL and returns the text. - - Args: - url (str): The URL to fetch content from. - - Returns: - str: The extracted text content from the web page. - """ - try: - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' - } - response = requests.get(url, headers=headers, timeout=10) - - if response.status_code == 200: - soup = BeautifulSoup(response.content, 'lxml') - # Remove script and style elements - for script in soup(["script", "style"]): - script.decompose() - text = soup.get_text(separator='\n', strip=True) - # Filter out very short lines - lines = [line for line in text.splitlines() if len(line.split()) > 3] - return '\n'.join(lines[:100]) # Limit to first 100 meaningful lines - else: - return f"Failed to fetch URL: {url} (Status: {response.status_code})" - except Exception as e: - return f"Error fetching URL {url}: {str(e)}" + """Fetches the content from a URL and returns the text. + + Args: + url (str): The URL to fetch content from. + + Returns: + str: The extracted text content from the web page. + """ + try: + headers = { + "User-Agent": ( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" + ) + } + response = requests.get(url, headers=headers, timeout=10) + + if response.status_code == 200: + soup = BeautifulSoup(response.content, "lxml") + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + text = soup.get_text(separator="\n", strip=True) + # Filter out very short lines + lines = [line for line in text.splitlines() if len(line.split()) > 3] + return "\n".join(lines[:100]) # Limit to first 100 meaningful lines + else: + return f"Failed to fetch URL: {url} (Status: {response.status_code})" + except Exception as e: + return f"Error fetching URL {url}: {str(e)}" # --- Model Configuration --- @@ -78,7 +79,9 @@ def load_web_page(url: str) -> str: search_agent = LlmAgent( name="SearchAgent", model=GEMINI_MODEL, - description="Searches the internet for relevant information using Google Search", + description=( + "Searches the internet for relevant information using Google Search" + ), instruction=""" You are a research assistant specialized in finding information online. @@ -123,7 +126,7 @@ def load_web_page(url: str) -> str: # --- Agent 3: Curator Agent (Gemini) --- # Filters, organizes, and validates the gathered information curator_agent = LlmAgent( - name="CuratorAgent", + name="CuratorAgent", model=GEMINI_MODEL, description="Curates, filters, and organizes research information", instruction=""" @@ -183,10 +186,10 @@ def load_web_page(url: str) -> str: root_agent = SequentialAgent( name="ResearchOrchestration", sub_agents=[ - search_agent, # Step 1: Find sources (Gemini + Google Search) - scraper_agent, # Step 2: Extract content (DeepSeek) - curator_agent, # Step 3: Organize & filter (Gemini) - writer_agent, # Step 4: Write report (DeepSeek) + search_agent, # Step 1: Find sources (Gemini + Google Search) + scraper_agent, # Step 2: Extract content (DeepSeek) + curator_agent, # Step 3: Organize & filter (Gemini) + writer_agent, # Step 4: Write report (DeepSeek) ], description=( "A research pipeline that searches the internet, extracts content, "