LangGraph
Integrating Judgeval with LangGraph allows for detailed tracing and evaluation of your graph workflows.
By adding the JudgevalCallbackHandler
to your LangGraph invocation, you can automatically trace node executions,
tool calls, and LLM interactions within your graph using Judgeval.
Judgeval Callback Handler
Judgeval provides the JudgevalCallbackHandler
for LangGraph integration, which works with both synchronous (graph.invoke
) and
asynchronous (graph.ainvoke
) LangGraph workflows.
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
judgment = Tracer(project_name="default_project")
handler = JudgevalCallbackHandler(judgment)
Example Integrations
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="default_project")
handler = JudgevalCallbackHandler(judgment)
def node_1(state: State):
# ... node logic ...
# Optionally add evaluation here using judgment.async_evaluate(...)
return state
def node_2(state: State):
# ... node logic ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("node_1", node_1)
graph_builder.add_node("node_2", node_2)
graph_builder.set_entry_point("node_1")
graph_builder.add_edge("node_1", "node_2")
graph_builder.add_edge("node_2", END)
graph = graph_builder.compile()
def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = graph.invoke(initial_state, config=config_with_callbacks)
print("Executed Nodes:", handler.executed_nodes)
print("Executed Tools:", handler.executed_tools)
print("Node/Tool Flow:", handler.executed_node_tools)
print("Final State:", final_state)
if __name__ == "__main__":
run_graph()
import asyncio
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="default_project")
handler = JudgevalCallbackHandler(judgment)
async def node_1(state: State):
# ... node logic ...
# Optionally add evaluation here using judgment.async_evaluate(...)
return state
async def node_2(state: State):
# ... node logic ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("node_1", node_1)
graph_builder.add_node("node_2", node_2)
graph_builder.set_entry_point("node_1")
graph_builder.add_edge("node_1", "node_2")
graph_builder.add_edge("node_2", END)
graph = graph_builder.compile()
async def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = await graph.ainvoke(initial_state, config=config_with_callbacks)
print("Executed Nodes:", handler.executed_nodes)
print("Executed Tools:", handler.executed_tools)
print("Node/Tool Flow:", handler.executed_node_tools)
print("Final State:", final_state)
if __name__ == "__main__":
asyncio.run(run_graph())
Triggering Evaluations
You can trigger Judgeval evaluations directly from within your graph nodes using Tracer.async_evaluate().
This associates the evaluation results with the specific node's execution span in the trace. For asynchronous LangGraph workflows, you must set trace_across_async_contexts=True
when initializing the tracer for async_evaluate
to capture the evaluation results properly.
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from judgeval.scorers import AnswerRelevancyScorer # Or other scorers
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="default_project")
handler = JudgevalCallbackHandler(judgment)
def my_node_function(state: State) -> State:
# ... your node logic ...
user_input = "some input"
llm_output = "some output"
model_name = "gpt-4"
judgment.async_evaluate(
scorer=AnswerRelevancyScorer(threshold=0.7),
example=Example(input=user_input, actual_output=llm_output),
model=model_name
)
# ... potentially modify state further ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("my_node_function", my_node_function)
graph_builder.set_entry_point("my_node_function")
graph_builder.add_edge("my_node_function", END)
graph = graph_builder.compile()
def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = graph.invoke(initial_state, config=config_with_callbacks)
if __name__ == "__main__":
run_graph()
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from judgeval.scorers import AnswerRelevancyScorer # Or other scorers
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
import asyncio
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="default_project", trace_across_async_contexts=True)
handler = JudgevalCallbackHandler(judgment)
async def my_async_node_function(state: State) -> State:
# ... your node logic ...
user_input = "some input"
llm_output = "some output"
model_name = "gpt-4"
judgment.async_evaluate(
scorer=AnswerRelevancyScorer(threshold=0.7),
example=Example(input=user_input, actual_output=llm_output),
model=model_name
)
# ... potentially modify state further ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("my_async_node_function", my_async_node_function)
graph_builder.set_entry_point("my_async_node_function")
graph_builder.add_edge("my_async_node_function", END)
graph = graph_builder.compile()
async def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = await graph.ainvoke(initial_state, config=config_with_callbacks)
if __name__ == "__main__":
asyncio.run(run_graph())