Skip to content

Commit 604dfe2

Browse files
authored
community[patch]: Force opt-in for WebResearchRetriever (CVE-2024-3095) (#24451)
This PR addresses the issue raised by (CVE-2024-3095) https://huntr.com/bounties/e62d4895-2901-405b-9559-38276b6a5273 Unfortunately, we didn't do a good job writing the initial report. It's pointing at both the wrong package and the wrong code. The affected code is the Web Retriever not the AsyncHTMLLoader, and the WebRetriever lives in langchain-community The vulnerable code lives here: https://github.com/langchain-ai/langchain/blob/0bd3f4e1292c085f22bef1fff16059851e11d042/libs/community/langchain_community/retrievers/web_research.py#L233-L233 This PR adds a forced opt-in for users to make sure they are aware of the risk and can mitigate by configuring a proxy: https://github.com/langchain-ai/langchain/blob/0bd3f4e1292c085f22bef1fff16059851e11d042/libs/community/langchain_community/retrievers/web_research.py#L84-L84
1 parent f101c75 commit 604dfe2

File tree

1 file changed

+30
-1
lines changed

1 file changed

+30
-1
lines changed

libs/community/langchain_community/retrievers/web_research.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
import re
3-
from typing import List, Optional
3+
from typing import Any, List, Optional
44

55
from langchain.chains import LLMChain
66
from langchain.chains.prompt_selector import ConditionalPromptSelector
@@ -81,6 +81,35 @@ class WebResearchRetriever(BaseRetriever):
8181
"check .netrc for proxy configuration",
8282
)
8383

84+
allow_dangerous_requests: bool = False
85+
"""A flag to force users to acknowledge the risks of SSRF attacks when using
86+
this retriever.
87+
88+
Users should set this flag to `True` if they have taken the necessary precautions
89+
to prevent SSRF attacks when using this retriever.
90+
91+
For example, users can run the requests through a properly configured
92+
proxy and prevent the crawler from accidentally crawling internal resources.
93+
"""
94+
95+
def __init__(self, **kwargs: Any) -> None:
96+
"""Initialize the retriever."""
97+
allow_dangerous_requests = kwargs.get("allow_dangerous_requests", False)
98+
if not allow_dangerous_requests:
99+
raise ValueError(
100+
"WebResearchRetriever crawls URLs surfaced through "
101+
"the provided search engine. It is possible that some of those URLs "
102+
"will end up pointing to machines residing on an internal network, "
103+
"leading"
104+
"to an SSRF (Server-Side Request Forgery) attack. "
105+
"To protect yourself against that risk, you can run the requests "
106+
"through a proxy and prevent the crawler from accidentally crawling "
107+
"internal resources."
108+
"If've taken the necessary precautions, you can set "
109+
"`allow_dangerous_requests` to `True`."
110+
)
111+
super().__init__(**kwargs)
112+
84113
@classmethod
85114
def from_llm(
86115
cls,

0 commit comments

Comments
 (0)