LangGraph
Integrating Judgeval with LangGraph allows for detailed tracing and evaluation of your graph workflows.
By adding the JudgevalCallbackHandler
to your LangGraph invocation, you can automatically trace node executions,
tool calls, and LLM interactions within your graph using Judgeval.
Judgeval Callback Handler
Judgeval provides the JudgevalCallbackHandler
for LangGraph integration, which works with both synchronous (graph.invoke
) and
asynchronous (graph.ainvoke
) LangGraph workflows.
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
judgment = Tracer(project_name="my-project")
handler = JudgevalCallbackHandler(judgment)
Example Integrations
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="my-project")
handler = JudgevalCallbackHandler(judgment)
def node_1(state: State):
# ... node logic ...
# Optionally add evaluation here using add_evaluation_to_state(state, ...)
return state
def node_2(state: State):
# ... node logic ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("node_1", node_1)
graph_builder.add_node("node_2", node_2)
graph_builder.set_entry_point("node_1")
graph_builder.add_edge("node_1", "node_2")
graph_builder.add_edge("node_2", END)
graph = graph_builder.compile()
def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = graph.invoke(initial_state, config=config_with_callbacks)
print("Executed Nodes:", handler.executed_nodes)
print("Executed Tools:", handler.executed_tools)
print("Node/Tool Flow:", handler.executed_node_tools)
print("Final State:", final_state)
if __name__ == "__main__":
run_graph()
import asyncio
from typing import TypedDict, Sequence
from langchain_core.messages import HumanMessage
from langgraph.graph import StateGraph, END
from judgeval.common.tracer import Tracer
from judgeval.integrations.langgraph import JudgevalCallbackHandler
class State(TypedDict):
messages: Sequence[HumanMessage]
# ...
judgment = Tracer(project_name="my-project")
handler = JudgevalCallbackHandler(judgment)
async def node_1(state: State):
# ... node logic ...
# Optionally add evaluation here using add_evaluation_to_state(state, ...)
return state
async def node_2(state: State):
# ... node logic ...
return state
graph_builder = StateGraph(State)
graph_builder.add_node("node_1", node_1)
graph_builder.add_node("node_2", node_2)
graph_builder.set_entry_point("node_1")
graph_builder.add_edge("node_1", "node_2")
graph_builder.add_edge("node_2", END)
graph = graph_builder.compile()
async def run_graph():
initial_state = {"messages": [HumanMessage(content="Hello!")]}
config_with_callbacks = {"callbacks": [handler]}
final_state = await graph.ainvoke(initial_state, config=config_with_callbacks)
print("Executed Nodes:", handler.executed_nodes)
print("Executed Tools:", handler.executed_tools)
print("Node/Tool Flow:", handler.executed_node_tools)
print("Final State:", final_state)
if __name__ == "__main__":
asyncio.run(run_graph())
Triggering Evaluations
You can trigger Judgeval evaluations directly from within your graph nodes.
This associates the evaluation results with the specific node's execution span in the trace.
The recommended way is to use the add_evaluation_to_state
helper function:
# Inside your LangGraph node function
from judgeval.integrations.langgraph import add_evaluation_to_state
from judgeval.scorers import AnswerRelevancyScorer # Or other scorers
def my_node_function(state: State) -> State:
# ... your node logic ...
user_input = "some input"
llm_output = "some output"
model_name = "gpt-4"
add_evaluation_to_state(
state=state,
scorers=[AnswerRelevancyScorer(threshold=0.7)],
input=user_input,
actual_output=llm_output,
model=model_name
)
# ... potentially modify state further ...
return state
# Inside your LangGraph node function
from judgeval.integrations.langgraph import add_evaluation_to_state
from judgeval.scorers import AnswerRelevancyScorer # Or other scorers
async def my_async_node_function(state: State) -> State:
# ... your node logic ...
user_input = "some input"
llm_output = "some output"
model_name = "gpt-4"
add_evaluation_to_state(
state=state,
scorers=[AnswerRelevancyScorer(threshold=0.7)],
input=user_input,
actual_output=llm_output,
model=model_name
)
# ... potentially modify state further ...
return state
Alternatively, you can manually add an EvaluationConfig
object to your node's output state under the key _judgeval_eval
.
The handler will detect this and trigger the evaluation.
View some of our demo code for more detailed examples.
Basic Workflow
A simple example demonstrating how to integrate Judgeval with a basic LangGraph workflow
Human in the Loop
Advanced example showing how to implement human-in-the-loop patterns with Judgeval tracing