forked from tsinghua-fib-lab/AgentSocietyChallenge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathModelingAgent_baseline.py
163 lines (135 loc) · 7.18 KB
/
ModelingAgent_baseline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
from websocietysimulator import Simulator
from websocietysimulator.agent import SimulationAgent
import json
from websocietysimulator.llm import LLMBase, InfinigenceLLM
from websocietysimulator.agent.modules.planning_modules import PlanningBase
from websocietysimulator.agent.modules.reasoning_modules import ReasoningBase
from websocietysimulator.agent.modules.memory_modules import MemoryDILU
import logging
logging.basicConfig(level=logging.INFO)
class PlanningBaseline(PlanningBase):
"""Inherit from PlanningBase"""
def __init__(self, llm):
"""Initialize the planning module"""
super().__init__(llm=llm)
def __call__(self, task_description):
"""Override the parent class's __call__ method"""
self.plan = [
{
'description': 'First I need to find user information',
'reasoning instruction': 'None',
'tool use instruction': {task_description['user_id']}
},
{
'description': 'Next, I need to find business information',
'reasoning instruction': 'None',
'tool use instruction': {task_description['item_id']}
}
]
return self.plan
class ReasoningBaseline(ReasoningBase):
"""Inherit from ReasoningBase"""
def __init__(self, profile_type_prompt, llm):
"""Initialize the reasoning module"""
super().__init__(profile_type_prompt=profile_type_prompt, memory=None, llm=llm)
def __call__(self, task_description: str):
"""Override the parent class's __call__ method"""
prompt = '''
{task_description}'''
prompt = prompt.format(task_description=task_description)
messages = [{"role": "user", "content": prompt}]
reasoning_result = self.llm(
messages=messages,
temperature=0.0,
max_tokens=1000
)
return reasoning_result
class MySimulationAgent(SimulationAgent):
"""Participant's implementation of SimulationAgent."""
def __init__(self, llm: LLMBase):
"""Initialize MySimulationAgent"""
super().__init__(llm=llm)
self.planning = PlanningBaseline(llm=self.llm)
self.reasoning = ReasoningBaseline(profile_type_prompt='', llm=self.llm)
self.memory = MemoryDILU(llm=self.llm)
def workflow(self):
"""
Simulate user behavior
Returns:
tuple: (star (float), useful (float), funny (float), cool (float), review_text (str))
"""
try:
plan = self.planning(task_description=self.task)
for sub_task in plan:
if 'user' in sub_task['description']:
user = str(self.interaction_tool.get_user(user_id=self.task['user_id']))
elif 'business' in sub_task['description']:
business = str(self.interaction_tool.get_item(item_id=self.task['item_id']))
reviews_item = self.interaction_tool.get_reviews(item_id=self.task['item_id'])
for review in reviews_item:
review_text = review['text']
self.memory(f'review: {review_text}')
reviews_user = self.interaction_tool.get_reviews(user_id=self.task['user_id'])
review_similar = self.memory(f'{reviews_user[0]["text"]}')
task_description = f'''
You are a real human user on Yelp, a platform for crowd-sourced business reviews. Here is your Yelp profile and review history: {user}
You need to write a review for this business: {business}
Others have reviewed this business before: {review_similar}
Please analyze the following aspects carefully:
1. Based on your user profile and review style, what rating would you give this business? Remember that many users give 5-star ratings for excellent experiences that exceed expectations, and 1-star ratings for very poor experiences that fail to meet basic standards.
2. Given the business details and your past experiences, what specific aspects would you comment on? Focus on the positive aspects that make this business stand out or negative aspects that severely impact the experience.
3. Consider how other users might engage with your review in terms of:
- Useful: How informative and helpful is your review?
- Funny: Does your review have any humorous or entertaining elements?
- Cool: Is your review particularly insightful or praiseworthy?
Requirements:
- Star rating must be one of: 1.0, 2.0, 3.0, 4.0, 5.0
- If the business meets or exceeds expectations in key areas, consider giving a 5-star rating
- If the business fails significantly in key areas, consider giving a 1-star rating
- Review text should be 2-4 sentences, focusing on your personal experience and emotional response
- Useful/funny/cool counts should be non-negative integers that reflect likely user engagement
- Maintain consistency with your historical review style and rating patterns
- Focus on specific details about the business rather than generic comments
- Be generous with ratings when businesses deliver quality service and products
- Be critical when businesses fail to meet basic standards
Format your response exactly as follows:
stars: [your rating]
review: [your review]
'''
result = self.reasoning(task_description)
try:
stars_line = [line for line in result.split('\n') if 'stars:' in line][0]
review_line = [line for line in result.split('\n') if 'review:' in line][0]
except:
print('Error:', result)
stars = float(stars_line.split(':')[1].strip())
review_text = review_line.split(':')[1].strip()
if len(review_text) > 512:
review_text = review_text[:512]
return {
"stars": stars,
"review": review_text
}
except Exception as e:
print(f"Error in workflow: {e}")
return {
"stars": 0,
"review": ""
}
if __name__ == "__main__":
# Set the data
task_set = "amazon" # "goodreads" or "yelp"
simulator = Simulator(data_dir="your data dir", device="gpu", cache=True)
simulator.set_task_and_groundtruth(task_dir=f"./track1/{task_set}/tasks", groundtruth_dir=f"./track1/{task_set}/groundtruth")
# Set the agent and LLM
simulator.set_agent(MySimulationAgent)
simulator.set_llm(InfinigenceLLM(api_key="your api key"))
# Run the simulation
# If you don't set the number of tasks, the simulator will run all tasks.
outputs = simulator.run_simulation(number_of_tasks=None, enable_threading=True, max_workers=10)
# Evaluate the agent
evaluation_results = simulator.evaluate()
with open(f'./evaluation_results_track1_{task_set}.json', 'w') as f:
json.dump(evaluation_results, f, indent=4)
# Get evaluation history
evaluation_history = simulator.get_evaluation_history()