-
Notifications
You must be signed in to change notification settings - Fork 3.3k
/
Copy pathresearch_agent.py
331 lines (280 loc) · 12.4 KB
/
research_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
import os
import uuid
import asyncio
import streamlit as st
from datetime import datetime
from dotenv import load_dotenv
from agents import (
Agent,
Runner,
WebSearchTool,
function_tool,
handoff,
trace,
)
from pydantic import BaseModel
# Load environment variables
load_dotenv()
# Set up page configuration
st.set_page_config(
page_title="OpenAI Researcher Agent",
page_icon="📰",
layout="wide",
initial_sidebar_state="expanded"
)
# Make sure API key is set
if not os.environ.get("OPENAI_API_KEY"):
st.error("Please set your OPENAI_API_KEY environment variable")
st.stop()
# App title and description
st.title("📰 OpenAI Researcher Agent")
st.subheader("Powered by OpenAI Agents SDK")
st.markdown("""
This app demonstrates the power of OpenAI's Agents SDK by creating a multi-agent system
that researches news topics and generates comprehensive research reports.
""")
# Define data models
class ResearchPlan(BaseModel):
topic: str
search_queries: list[str]
focus_areas: list[str]
class ResearchReport(BaseModel):
title: str
outline: list[str]
report: str
sources: list[str]
word_count: int
# Custom tool for saving facts found during research
@function_tool
def save_important_fact(fact: str, source: str = None) -> str:
"""Save an important fact discovered during research.
Args:
fact: The important fact to save
source: Optional source of the fact
Returns:
Confirmation message
"""
if "collected_facts" not in st.session_state:
st.session_state.collected_facts = []
st.session_state.collected_facts.append({
"fact": fact,
"source": source or "Not specified",
"timestamp": datetime.now().strftime("%H:%M:%S")
})
return f"Fact saved: {fact}"
# Define the agents
research_agent = Agent(
name="Research Agent",
instructions="You are a research assistant. Given a search term, you search the web for that term and"
"produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300"
"words. Capture the main points. Write succintly, no need to have complete sentences or good"
"grammar. This will be consumed by someone synthesizing a report, so its vital you capture the"
"essence and ignore any fluff. Do not include any additional commentary other than the summary"
"itself.",
model="gpt-4o-mini",
tools=[
WebSearchTool(),
save_important_fact
],
)
editor_agent = Agent(
name="Editor Agent",
handoff_description="A senior researcher who writes comprehensive research reports",
instructions="You are a senior researcher tasked with writing a cohesive report for a research query. "
"You will be provided with the original query, and some initial research done by a research "
"assistant.\n"
"You should first come up with an outline for the report that describes the structure and "
"flow of the report. Then, generate the report and return that as your final output.\n"
"The final output should be in markdown format, and it should be lengthy and detailed. Aim "
"for 5-10 pages of content, at least 1000 words.",
model="gpt-4o-mini",
output_type=ResearchReport,
)
triage_agent = Agent(
name="Triage Agent",
instructions="""You are the coordinator of this research operation. Your job is to:
1. Understand the user's research topic
2. Create a research plan with the following elements:
- topic: A clear statement of the research topic
- search_queries: A list of 3-5 specific search queries that will help gather information
- focus_areas: A list of 3-5 key aspects of the topic to investigate
3. Hand off to the Research Agent to collect information
4. After research is complete, hand off to the Editor Agent who will write a comprehensive report
Make sure to return your plan in the expected structured format with topic, search_queries, and focus_areas.
""",
handoffs=[
handoff(research_agent),
handoff(editor_agent)
],
model="gpt-4o-mini",
output_type=ResearchPlan,
)
# Create sidebar for input and controls
with st.sidebar:
st.header("Research Topic")
user_topic = st.text_input(
"Enter a topic to research:",
)
start_button = st.button("Start Research", type="primary", disabled=not user_topic)
st.divider()
st.subheader("Example Topics")
example_topics = [
"What are the best cruise lines in USA for first-time travelers who have never been on a cruise?",
"What are the best affordable espresso machines for someone upgrading from a French press?",
"What are the best off-the-beaten-path destinations in India for a first-time solo traveler?"
]
for topic in example_topics:
if st.button(topic):
user_topic = topic
start_button = True
# Main content area with two tabs
tab1, tab2 = st.tabs(["Research Process", "Report"])
# Initialize session state for storing results
if "conversation_id" not in st.session_state:
st.session_state.conversation_id = str(uuid.uuid4().hex[:16])
if "collected_facts" not in st.session_state:
st.session_state.collected_facts = []
if "research_done" not in st.session_state:
st.session_state.research_done = False
if "report_result" not in st.session_state:
st.session_state.report_result = None
# Main research function
async def run_research(topic):
# Reset state for new research
st.session_state.collected_facts = []
st.session_state.research_done = False
st.session_state.report_result = None
with tab1:
message_container = st.container()
# Create error handling container
error_container = st.empty()
# Create a trace for the entire workflow
with trace("News Research", group_id=st.session_state.conversation_id):
# Start with the triage agent
with message_container:
st.write("🔍 **Triage Agent**: Planning research approach...")
triage_result = await Runner.run(
triage_agent,
f"Research this topic thoroughly: {topic}. This research will be used to create a comprehensive research report."
)
# Check if the result is a ResearchPlan object or a string
if hasattr(triage_result.final_output, 'topic'):
research_plan = triage_result.final_output
plan_display = {
"topic": research_plan.topic,
"search_queries": research_plan.search_queries,
"focus_areas": research_plan.focus_areas
}
else:
# Fallback if we don't get the expected output type
research_plan = {
"topic": topic,
"search_queries": ["Researching " + topic],
"focus_areas": ["General information about " + topic]
}
plan_display = research_plan
with message_container:
st.write("📋 **Research Plan**:")
st.json(plan_display)
# Display facts as they're collected
fact_placeholder = message_container.empty()
# Check for new facts periodically
previous_fact_count = 0
for i in range(15): # Check more times to allow for more comprehensive research
current_facts = len(st.session_state.collected_facts)
if current_facts > previous_fact_count:
with fact_placeholder.container():
st.write("📚 **Collected Facts**:")
for fact in st.session_state.collected_facts:
st.info(f"**Fact**: {fact['fact']}\n\n**Source**: {fact['source']}")
previous_fact_count = current_facts
await asyncio.sleep(1)
# Editor Agent phase
with message_container:
st.write("📝 **Editor Agent**: Creating comprehensive research report...")
try:
report_result = await Runner.run(
editor_agent,
triage_result.to_input_list()
)
st.session_state.report_result = report_result.final_output
with message_container:
st.write("✅ **Research Complete! Report Generated.**")
# Preview a snippet of the report
if hasattr(report_result.final_output, 'report'):
report_preview = report_result.final_output.report[:300] + "..."
else:
report_preview = str(report_result.final_output)[:300] + "..."
st.write("📄 **Report Preview**:")
st.markdown(report_preview)
st.write("*See the Report tab for the full document.*")
except Exception as e:
st.error(f"Error generating report: {str(e)}")
# Fallback to display raw agent response
if hasattr(triage_result, 'new_items'):
messages = [item for item in triage_result.new_items if hasattr(item, 'content')]
if messages:
raw_content = "\n\n".join([str(m.content) for m in messages if m.content])
st.session_state.report_result = raw_content
with message_container:
st.write("⚠️ **Research completed but there was an issue generating the structured report.**")
st.write("Raw research results are available in the Report tab.")
st.session_state.research_done = True
# Run the research when the button is clicked
if start_button:
with st.spinner(f"Researching: {user_topic}"):
try:
asyncio.run(run_research(user_topic))
except Exception as e:
st.error(f"An error occurred during research: {str(e)}")
# Set a basic report result so the user gets something
st.session_state.report_result = f"# Research on {user_topic}\n\nUnfortunately, an error occurred during the research process. Please try again later or with a different topic.\n\nError details: {str(e)}"
st.session_state.research_done = True
# Display results in the Report tab
with tab2:
if st.session_state.research_done and st.session_state.report_result:
report = st.session_state.report_result
# Handle different possible types of report results
if hasattr(report, 'title'):
# We have a properly structured ResearchReport object
title = report.title
# Display outline if available
if hasattr(report, 'outline') and report.outline:
with st.expander("Report Outline", expanded=True):
for i, section in enumerate(report.outline):
st.markdown(f"{i+1}. {section}")
# Display word count if available
if hasattr(report, 'word_count'):
st.info(f"Word Count: {report.word_count}")
# Display the full report in markdown
if hasattr(report, 'report'):
report_content = report.report
st.markdown(report_content)
else:
report_content = str(report)
st.markdown(report_content)
# Display sources if available
if hasattr(report, 'sources') and report.sources:
with st.expander("Sources"):
for i, source in enumerate(report.sources):
st.markdown(f"{i+1}. {source}")
# Add download button for the report
st.download_button(
label="Download Report",
data=report_content,
file_name=f"{title.replace(' ', '_')}.md",
mime="text/markdown"
)
else:
# Handle string or other type of response
report_content = str(report)
title = user_topic.title()
st.title(f"{title}")
st.markdown(report_content)
# Add download button for the report
st.download_button(
label="Download Report",
data=report_content,
file_name=f"{title.replace(' ', '_')}.md",
mime="text/markdown"
)