Skip to content

[feature] support brave search api and refractor google serper api in BingBroswer #233

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 26, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
209 changes: 209 additions & 0 deletions lagent/actions/bing_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,215 @@ def _parse_response(self, response: dict) -> dict:
return self._filter_results(raw_results)


class BraveSearch(BaseSearch):
"""
Wrapper around the Brave Search API.

To use, you should pass your Brave Search API key to the constructor.

Args:
api_key (str): API KEY to use Brave Search API.
You can create a free API key at https://api.search.brave.com/app/keys.
search_type (str): Brave Search API supports ['web', 'news', 'images', 'videos'],
currently only supports 'news' and 'web'.
topk (int): The number of search results returned in response from API search results.
region (str): The country code string. Specifies the country where the search results come from.
language (str): The language code string. Specifies the preferred language for the search results.
extra_snippets (bool): Allows retrieving up to 5 additional snippets, which are alternative excerpts from the search results.
**kwargs: Any other parameters related to the Brave Search API. Find more details at
https://api.search.brave.com/app/documentation/web-search/get-started.
"""

def __init__(self,
api_key: str,
region: str = 'ALL',
language: str = 'zh-hans',
extra_snippests: bool = True,
topk: int = 3,
black_list: List[str] = [
'enoN',
'youtube.com',
'bilibili.com',
'researchgate.net',
],
**kwargs):
self.api_key = api_key
self.market = region
self.proxy = kwargs.get('proxy')
self.language = language
self.extra_snippests = extra_snippests
self.search_type = kwargs.get('search_type', 'web')
self.kwargs = kwargs
super().__init__(topk, black_list)

@cached(cache=TTLCache(maxsize=100, ttl=600))
def search(self, query: str, max_retry: int = 3) -> dict:
for attempt in range(max_retry):
try:
response = self._call_brave_api(query)
return self._parse_response(response)
except Exception as e:
logging.exception(str(e))
warnings.warn(
f'Retry {attempt + 1}/{max_retry} due to error: {e}')
time.sleep(random.randint(2, 5))
raise Exception(
'Failed to get search results from Brave Search after retries.')

def _call_brave_api(self, query: str) -> dict:
endpoint = f'https://api.search.brave.com/res/v1/{self.search_type}/search'
params = {
'q': query,
'country': self.market,
'search_lang': self.language,
'extra_snippets': self.extra_snippests,
'count': self.topk,
**{
key: value
for key, value in self.kwargs.items() if value is not None
},
}
headers = {
'X-Subscription-Token': self.api_key or '',
'Accept': 'application/json'
}
response = requests.get(
endpoint, headers=headers, params=params, proxies=self.proxy)
response.raise_for_status()
return response.json()

def _parse_response(self, response: dict) -> dict:
if self.search_type == 'web':
filtered_result = response.get('web', {}).get('results', [])
else:
filtered_result = response.get('results', {})
raw_results = []

for item in filtered_result:
raw_results.append((
item.get('url', ''),
' '.join(
filter(None, [
item.get('description'),
*item.get('extra_snippets', [])
])),
item.get('title', ''),
))
return self._filter_results(raw_results)


class GoogleSearch(BaseSearch):
"""
Wrapper around the Serper.dev Google Search API.

To use, you should pass your serper API key to the constructor.

Args:
api_key (str): API KEY to use serper google search API.
You can create a free API key at https://serper.dev.
search_type (str): Serper API supports ['search', 'images', 'news',
'places'] types of search, currently we only support 'search' and 'news'.
topk (int): The number of search results returned in response from api search results.
**kwargs: Any other parameters related to the Serper API. Find more details at
https://serper.dev/playground
"""

result_key_for_type = {
'news': 'news',
'places': 'places',
'images': 'images',
'search': 'organic',
}

def __init__(self,
api_key: str,
topk: int = 3,
black_list: List[str] = [
'enoN',
'youtube.com',
'bilibili.com',
'researchgate.net',
],
**kwargs):
self.api_key = api_key
self.proxy = kwargs.get('proxy')
self.search_type = kwargs.get('search_type', 'search')
self.kwargs = kwargs
super().__init__(topk, black_list)

@cached(cache=TTLCache(maxsize=100, ttl=600))
def search(self, query: str, max_retry: int = 3) -> dict:
for attempt in range(max_retry):
try:
response = self._call_serper_api(query)
return self._parse_response(response)
except Exception as e:
logging.exception(str(e))
warnings.warn(
f'Retry {attempt + 1}/{max_retry} due to error: {e}')
time.sleep(random.randint(2, 5))
raise Exception(
'Failed to get search results from Google Serper Search after retries.'
)

def _call_serper_api(self, query: str) -> dict:
endpoint = f'https://google.serper.dev/{self.search_type}'
params = {
'q': query,
'num': self.topk,
**{
key: value
for key, value in self.kwargs.items() if value is not None
},
}
headers = {
'X-API-KEY': self.api_key or '',
'Content-Type': 'application/json'
}
response = requests.get(
endpoint, headers=headers, params=params, proxies=self.proxy)
response.raise_for_status()
return response.json()

def _parse_response(self, response: dict) -> dict:
raw_results = []

if response.get('answerBox'):
answer_box = response.get('answerBox', {})
if answer_box.get('answer'):
raw_results.append(('', answer_box.get('answer'), ''))
elif answer_box.get('snippet'):
raw_results.append(
('', answer_box.get('snippet').replace('\n', ' '), ''))
elif answer_box.get('snippetHighlighted'):
raw_results.append(
('', answer_box.get('snippetHighlighted'), ''))

if response.get('knowledgeGraph'):
kg = response.get('knowledgeGraph', {})
description = kg.get('description', '')
attributes = '. '.join(
f'{attribute}: {value}'
for attribute, value in kg.get('attributes', {}).items())
raw_results.append(
(kg.get('descriptionLink', ''),
f'{description}. {attributes}' if attributes else description,
f"{kg.get('title', '')}: {kg.get('type', '')}."))

for result in response[self.result_key_for_type[
self.search_type]][:self.topk]:
description = result.get('snippet', '')
attributes = '. '.join(
f'{attribute}: {value}'
for attribute, value in result.get('attributes', {}).items())
raw_results.append(
(result.get('link', ''),
f'{description}. {attributes}' if attributes else description,
result.get('title', '')))

return self._filter_results(raw_results)


class ContentFetcher:

def __init__(self, timeout: int = 5):
Expand Down