Skip to content

Agent Run

An AgentRun represents a complete agent run. It contains a collection of Transcript objects, as well as metadata (scores, experiment info, etc.).

  • In single-agent (most common) settings, each AgentRun contains a single Transcript.
  • In multi-agent settings, an AgentRun may contain multiple Transcript objects. For example, in a two-agent debate setting, you'll have one Transcript per agent in the same AgentRun.
  • Docent's LLM search features operate over complete AgentRun objects. Runs are passed to LLMs in their .text form.

Usage

AgentRun objects require a dictionary of Transcript objects, as well as a BaseAgentRunMetadata object. In the base metadata object, you must specify scores.

from docent.data_models import AgentRun, Transcript, BaseAgentRunMetadata
from docent.data_models.chat import UserMessage, AssistantMessage

transcripts = {
    "default": Transcript(
        messages=[
            UserMessage(content="Hello, what's 1 + 1?"),
            AssistantMessage(content="2"),
        ]
    )
}

agent_run = AgentRun(
    transcripts=transcripts,
    metadata=BaseAgentRunMetadata(
        scores={"correct": True, "reward": 1.0},
    )
)

If you want to add additional fields to your metadata, see here for instructions on subclassing.

Rendering

To see how your AgentRun is being rendered to an LLM, you can print(agent_run.text). This might be useful for validating that your metadata is being included properly.

docent.data_models.agent_run

AgentRun

Bases: BaseModel

Represents a complete run of an agent with transcripts and metadata.

An AgentRun encapsulates the execution of an agent, storing all communication transcripts and associated metadata. It must contain at least one transcript.

Attributes:

Name Type Description
id str

Unique identifier for the agent run, auto-generated by default.

name str | None

Optional human-readable name for the agent run.

description str | None

Optional description of the agent run.

transcripts dict[str, Transcript]

Dict mapping transcript IDs to Transcript objects.

metadata BaseAgentRunMetadata

Additional structured metadata about the agent run.

Source code in docent/data_models/agent_run.py
class AgentRun(BaseModel):
    """Represents a complete run of an agent with transcripts and metadata.

    An AgentRun encapsulates the execution of an agent, storing all communication
    transcripts and associated metadata. It must contain at least one transcript.

    Attributes:
        id: Unique identifier for the agent run, auto-generated by default.
        name: Optional human-readable name for the agent run.
        description: Optional description of the agent run.
        transcripts: Dict mapping transcript IDs to Transcript objects.
        metadata: Additional structured metadata about the agent run.
    """

    id: str = Field(default_factory=lambda: str(uuid4()))
    name: str | None = None
    description: str | None = None

    transcripts: dict[str, Transcript]
    metadata: BaseAgentRunMetadata

    @field_serializer("metadata")
    def serialize_metadata(self, metadata: BaseAgentRunMetadata, _info: Any) -> dict[str, Any]:
        """
        Custom serializer for the metadata field so the internal fields are explicitly preserved.
        """
        return metadata.model_dump(strip_internal_fields=False)

    @field_validator("metadata", mode="before")
    @classmethod
    def _validate_metadata_type(cls, v: Any) -> Any:
        if v is not None and not isinstance(v, BaseAgentRunMetadata):
            raise ValueError(
                f"metadata must be an instance of BaseAgentRunMetadata, got {type(v).__name__}"
            )
        return v

    @model_validator(mode="after")
    def _validate_transcripts_not_empty(self):
        """Validates that the agent run contains at least one transcript.

        Raises:
            ValueError: If the transcripts list is empty.

        Returns:
            AgentRun: The validated AgentRun instance.
        """
        if len(self.transcripts) == 0:
            raise ValueError("AgentRun must have at least one transcript")
        return self

    def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
        """
        Represents an agent run as a list of strings, each of which is at most token_limit tokens
        under the GPT-4 tokenization scheme.

        We'll try to split up long AgentRuns along transcript boundaries and include metadata.
        For very long transcripts, we'll have to split them up further and remove metadata.
        """

        transcript_strs: list[str] = [
            f"<transcript {t_key}>\n{t.to_str(agent_run_idx=None, transcript_idx=i)}\n</transcript {t_key}>"
            for i, (t_key, t) in enumerate(self.transcripts.items())
        ]
        transcripts_str = "\n\n".join(transcript_strs)

        # Gather metadata
        metadata_obj = self.metadata.model_dump(strip_internal_fields=True)
        if self.name is not None:
            metadata_obj["name"] = self.name
        if self.description is not None:
            metadata_obj["description"] = self.description
        # Add the field descriptions if they exist
        metadata_obj = {
            (f"{k} ({d})" if (d := self.metadata.get_field_description(k)) is not None else k): v
            for k, v in metadata_obj.items()
        }

        yaml_width = float("inf")
        transcripts_str = (
            f"Here is a complete agent run for analysis purposes only:\n{transcripts_str}\n\n"
        )
        metadata_str = f"Metadata about the complete agent run:\n<agent run metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</agent run metadata>"

        if token_limit == sys.maxsize:
            return [f"{transcripts_str}" f"{metadata_str}"]

        # Compute message length; if fits, return the full transcript and metadata
        transcript_str_tokens = get_token_count(transcripts_str)
        metadata_str_tokens = get_token_count(metadata_str)
        if transcript_str_tokens + metadata_str_tokens <= token_limit:
            return [f"{transcripts_str}" f"{metadata_str}"]

        # Otherwise, split up the transcript and metadata into chunks
        # TODO(vincent, mengk): does this code account for multiple transcripts correctly? a little confused.
        else:
            results: list[str] = []
            transcript_token_counts = [get_token_count(t) for t in transcript_strs]
            ranges = group_messages_into_ranges(
                transcript_token_counts, metadata_str_tokens, token_limit - 50
            )
            for msg_range in ranges:
                if msg_range.include_metadata:
                    cur_transcript_str = "\n\n".join(
                        transcript_strs[msg_range.start : msg_range.end]
                    )
                    results.append(
                        f"Here is a partial agent run for analysis purposes only:\n{cur_transcript_str}"
                        f"{metadata_str}"
                    )
                else:
                    assert (
                        msg_range.end == msg_range.start + 1
                    ), "Ranges without metadata should be a single message"
                    t_id, t = list(self.transcripts.items())[msg_range.start]
                    if msg_range.num_tokens < token_limit - 50:
                        transcript = f"<transcript {t_id}>\n{t.to_str()}\n</transcript {t_id}>"
                        result = (
                            f"Here is a partial agent run for analysis purposes only:\n{transcript}"
                        )
                        results.append(result)
                    else:
                        transcript_fragments = t.to_str_with_token_limit(token_limit - 50)
                        for fragment in transcript_fragments:
                            result = f"<transcript {t_id}>\n{fragment}\n</transcript {t_id}>"
                            result = (
                                f"Here is a partial agent run for analysis purposes only:\n{result}"
                            )
                            results.append(result)
            return results

    @property
    def text(self) -> str:
        """Concatenates all transcript texts with double newlines as separators.

        Returns:
            str: A string representation of all transcripts.
        """
        return self.to_text()[0]

    def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
        """Extends the parent model_dump method to include the text property.

        Args:
            *args: Variable length argument list passed to parent method.
            **kwargs: Arbitrary keyword arguments passed to parent method.

        Returns:
            dict[str, Any]: Dictionary representation of the model including the text property.
        """
        return super().model_dump(*args, **kwargs) | {"text": self.text}

    def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
        """Returns a list of all fields that can be used to filter the agent run,
        by recursively exploring the model_dump() for singleton types in dictionaries.

        Returns:
            list[FilterableField]: A list of filterable fields, where each field is a
                                   dictionary containing its 'name' (path) and 'type'.
        """

        result: list[FilterableField] = []

        def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
            nonlocal result

            if depth > max_depth:
                return

            for k, v in d.items():
                if isinstance(v, (str, int, float, bool)):
                    result.append(
                        {
                            "name": f"{prefix}.{k}",
                            "type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
                        }
                    )
                elif isinstance(v, dict):
                    _explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)

        # Look at the agent run metadata
        _explore_dict(self.metadata.model_dump(strip_internal_fields=True), "metadata", 0)
        # Look at the transcript metadata
        # TODO(mengk): restore this later when we have the ability to integrate with SQL.
        # for t_id, t in self.transcripts.items():
        #     _explore_dict(
        #         t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
        #     )

        # Append the text field
        result.append({"name": "text", "type": "str"})

        return result

text property

text: str

Concatenates all transcript texts with double newlines as separators.

Returns:

Name Type Description
str str

A string representation of all transcripts.

serialize_metadata

serialize_metadata(metadata: BaseAgentRunMetadata, _info: Any) -> dict[str, Any]

Custom serializer for the metadata field so the internal fields are explicitly preserved.

Source code in docent/data_models/agent_run.py
@field_serializer("metadata")
def serialize_metadata(self, metadata: BaseAgentRunMetadata, _info: Any) -> dict[str, Any]:
    """
    Custom serializer for the metadata field so the internal fields are explicitly preserved.
    """
    return metadata.model_dump(strip_internal_fields=False)

to_text

to_text(token_limit: int = maxsize) -> list[str]

Represents an agent run as a list of strings, each of which is at most token_limit tokens under the GPT-4 tokenization scheme.

We'll try to split up long AgentRuns along transcript boundaries and include metadata. For very long transcripts, we'll have to split them up further and remove metadata.

Source code in docent/data_models/agent_run.py
def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
    """
    Represents an agent run as a list of strings, each of which is at most token_limit tokens
    under the GPT-4 tokenization scheme.

    We'll try to split up long AgentRuns along transcript boundaries and include metadata.
    For very long transcripts, we'll have to split them up further and remove metadata.
    """

    transcript_strs: list[str] = [
        f"<transcript {t_key}>\n{t.to_str(agent_run_idx=None, transcript_idx=i)}\n</transcript {t_key}>"
        for i, (t_key, t) in enumerate(self.transcripts.items())
    ]
    transcripts_str = "\n\n".join(transcript_strs)

    # Gather metadata
    metadata_obj = self.metadata.model_dump(strip_internal_fields=True)
    if self.name is not None:
        metadata_obj["name"] = self.name
    if self.description is not None:
        metadata_obj["description"] = self.description
    # Add the field descriptions if they exist
    metadata_obj = {
        (f"{k} ({d})" if (d := self.metadata.get_field_description(k)) is not None else k): v
        for k, v in metadata_obj.items()
    }

    yaml_width = float("inf")
    transcripts_str = (
        f"Here is a complete agent run for analysis purposes only:\n{transcripts_str}\n\n"
    )
    metadata_str = f"Metadata about the complete agent run:\n<agent run metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</agent run metadata>"

    if token_limit == sys.maxsize:
        return [f"{transcripts_str}" f"{metadata_str}"]

    # Compute message length; if fits, return the full transcript and metadata
    transcript_str_tokens = get_token_count(transcripts_str)
    metadata_str_tokens = get_token_count(metadata_str)
    if transcript_str_tokens + metadata_str_tokens <= token_limit:
        return [f"{transcripts_str}" f"{metadata_str}"]

    # Otherwise, split up the transcript and metadata into chunks
    # TODO(vincent, mengk): does this code account for multiple transcripts correctly? a little confused.
    else:
        results: list[str] = []
        transcript_token_counts = [get_token_count(t) for t in transcript_strs]
        ranges = group_messages_into_ranges(
            transcript_token_counts, metadata_str_tokens, token_limit - 50
        )
        for msg_range in ranges:
            if msg_range.include_metadata:
                cur_transcript_str = "\n\n".join(
                    transcript_strs[msg_range.start : msg_range.end]
                )
                results.append(
                    f"Here is a partial agent run for analysis purposes only:\n{cur_transcript_str}"
                    f"{metadata_str}"
                )
            else:
                assert (
                    msg_range.end == msg_range.start + 1
                ), "Ranges without metadata should be a single message"
                t_id, t = list(self.transcripts.items())[msg_range.start]
                if msg_range.num_tokens < token_limit - 50:
                    transcript = f"<transcript {t_id}>\n{t.to_str()}\n</transcript {t_id}>"
                    result = (
                        f"Here is a partial agent run for analysis purposes only:\n{transcript}"
                    )
                    results.append(result)
                else:
                    transcript_fragments = t.to_str_with_token_limit(token_limit - 50)
                    for fragment in transcript_fragments:
                        result = f"<transcript {t_id}>\n{fragment}\n</transcript {t_id}>"
                        result = (
                            f"Here is a partial agent run for analysis purposes only:\n{result}"
                        )
                        results.append(result)
        return results

model_dump

model_dump(*args: Any, **kwargs: Any) -> dict[str, Any]

Extends the parent model_dump method to include the text property.

Parameters:

Name Type Description Default
*args Any

Variable length argument list passed to parent method.

()
**kwargs Any

Arbitrary keyword arguments passed to parent method.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary representation of the model including the text property.

Source code in docent/data_models/agent_run.py
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
    """Extends the parent model_dump method to include the text property.

    Args:
        *args: Variable length argument list passed to parent method.
        **kwargs: Arbitrary keyword arguments passed to parent method.

    Returns:
        dict[str, Any]: Dictionary representation of the model including the text property.
    """
    return super().model_dump(*args, **kwargs) | {"text": self.text}

get_filterable_fields

get_filterable_fields(max_depth: int = 1) -> list[FilterableField]

Returns a list of all fields that can be used to filter the agent run, by recursively exploring the model_dump() for singleton types in dictionaries.

Returns:

Type Description
list[FilterableField]

list[FilterableField]: A list of filterable fields, where each field is a dictionary containing its 'name' (path) and 'type'.

Source code in docent/data_models/agent_run.py
def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
    """Returns a list of all fields that can be used to filter the agent run,
    by recursively exploring the model_dump() for singleton types in dictionaries.

    Returns:
        list[FilterableField]: A list of filterable fields, where each field is a
                               dictionary containing its 'name' (path) and 'type'.
    """

    result: list[FilterableField] = []

    def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
        nonlocal result

        if depth > max_depth:
            return

        for k, v in d.items():
            if isinstance(v, (str, int, float, bool)):
                result.append(
                    {
                        "name": f"{prefix}.{k}",
                        "type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
                    }
                )
            elif isinstance(v, dict):
                _explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)

    # Look at the agent run metadata
    _explore_dict(self.metadata.model_dump(strip_internal_fields=True), "metadata", 0)
    # Look at the transcript metadata
    # TODO(mengk): restore this later when we have the ability to integrate with SQL.
    # for t_id, t in self.transcripts.items():
    #     _explore_dict(
    #         t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
    #     )

    # Append the text field
    result.append({"name": "text", "type": "str"})

    return result

AgentRunWithoutMetadataValidator

Bases: AgentRun

A version of AgentRun that doesn't have the model_validator on metadata. Needed for sending/receiving agent runs via JSON, since they incorrectly trip the existing model_validator.

Source code in docent/data_models/agent_run.py
class AgentRunWithoutMetadataValidator(AgentRun):
    """
    A version of AgentRun that doesn't have the model_validator on metadata.
    Needed for sending/receiving agent runs via JSON, since they incorrectly trip the existing model_validator.
    """

    transcripts: dict[str, TranscriptWithoutMetadataValidator]  # type: ignore

    @field_validator("metadata", mode="before")
    @classmethod
    def _validate_metadata_type(cls, v: Any) -> Any:
        # Bypass the model_validator
        return v

text property

text: str

Concatenates all transcript texts with double newlines as separators.

Returns:

Name Type Description
str str

A string representation of all transcripts.

serialize_metadata

serialize_metadata(metadata: BaseAgentRunMetadata, _info: Any) -> dict[str, Any]

Custom serializer for the metadata field so the internal fields are explicitly preserved.

Source code in docent/data_models/agent_run.py
@field_serializer("metadata")
def serialize_metadata(self, metadata: BaseAgentRunMetadata, _info: Any) -> dict[str, Any]:
    """
    Custom serializer for the metadata field so the internal fields are explicitly preserved.
    """
    return metadata.model_dump(strip_internal_fields=False)

to_text

to_text(token_limit: int = maxsize) -> list[str]

Represents an agent run as a list of strings, each of which is at most token_limit tokens under the GPT-4 tokenization scheme.

We'll try to split up long AgentRuns along transcript boundaries and include metadata. For very long transcripts, we'll have to split them up further and remove metadata.

Source code in docent/data_models/agent_run.py
def to_text(self, token_limit: int = sys.maxsize) -> list[str]:
    """
    Represents an agent run as a list of strings, each of which is at most token_limit tokens
    under the GPT-4 tokenization scheme.

    We'll try to split up long AgentRuns along transcript boundaries and include metadata.
    For very long transcripts, we'll have to split them up further and remove metadata.
    """

    transcript_strs: list[str] = [
        f"<transcript {t_key}>\n{t.to_str(agent_run_idx=None, transcript_idx=i)}\n</transcript {t_key}>"
        for i, (t_key, t) in enumerate(self.transcripts.items())
    ]
    transcripts_str = "\n\n".join(transcript_strs)

    # Gather metadata
    metadata_obj = self.metadata.model_dump(strip_internal_fields=True)
    if self.name is not None:
        metadata_obj["name"] = self.name
    if self.description is not None:
        metadata_obj["description"] = self.description
    # Add the field descriptions if they exist
    metadata_obj = {
        (f"{k} ({d})" if (d := self.metadata.get_field_description(k)) is not None else k): v
        for k, v in metadata_obj.items()
    }

    yaml_width = float("inf")
    transcripts_str = (
        f"Here is a complete agent run for analysis purposes only:\n{transcripts_str}\n\n"
    )
    metadata_str = f"Metadata about the complete agent run:\n<agent run metadata>\n{yaml.dump(metadata_obj, width=yaml_width)}\n</agent run metadata>"

    if token_limit == sys.maxsize:
        return [f"{transcripts_str}" f"{metadata_str}"]

    # Compute message length; if fits, return the full transcript and metadata
    transcript_str_tokens = get_token_count(transcripts_str)
    metadata_str_tokens = get_token_count(metadata_str)
    if transcript_str_tokens + metadata_str_tokens <= token_limit:
        return [f"{transcripts_str}" f"{metadata_str}"]

    # Otherwise, split up the transcript and metadata into chunks
    # TODO(vincent, mengk): does this code account for multiple transcripts correctly? a little confused.
    else:
        results: list[str] = []
        transcript_token_counts = [get_token_count(t) for t in transcript_strs]
        ranges = group_messages_into_ranges(
            transcript_token_counts, metadata_str_tokens, token_limit - 50
        )
        for msg_range in ranges:
            if msg_range.include_metadata:
                cur_transcript_str = "\n\n".join(
                    transcript_strs[msg_range.start : msg_range.end]
                )
                results.append(
                    f"Here is a partial agent run for analysis purposes only:\n{cur_transcript_str}"
                    f"{metadata_str}"
                )
            else:
                assert (
                    msg_range.end == msg_range.start + 1
                ), "Ranges without metadata should be a single message"
                t_id, t = list(self.transcripts.items())[msg_range.start]
                if msg_range.num_tokens < token_limit - 50:
                    transcript = f"<transcript {t_id}>\n{t.to_str()}\n</transcript {t_id}>"
                    result = (
                        f"Here is a partial agent run for analysis purposes only:\n{transcript}"
                    )
                    results.append(result)
                else:
                    transcript_fragments = t.to_str_with_token_limit(token_limit - 50)
                    for fragment in transcript_fragments:
                        result = f"<transcript {t_id}>\n{fragment}\n</transcript {t_id}>"
                        result = (
                            f"Here is a partial agent run for analysis purposes only:\n{result}"
                        )
                        results.append(result)
        return results

model_dump

model_dump(*args: Any, **kwargs: Any) -> dict[str, Any]

Extends the parent model_dump method to include the text property.

Parameters:

Name Type Description Default
*args Any

Variable length argument list passed to parent method.

()
**kwargs Any

Arbitrary keyword arguments passed to parent method.

{}

Returns:

Type Description
dict[str, Any]

dict[str, Any]: Dictionary representation of the model including the text property.

Source code in docent/data_models/agent_run.py
def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]:
    """Extends the parent model_dump method to include the text property.

    Args:
        *args: Variable length argument list passed to parent method.
        **kwargs: Arbitrary keyword arguments passed to parent method.

    Returns:
        dict[str, Any]: Dictionary representation of the model including the text property.
    """
    return super().model_dump(*args, **kwargs) | {"text": self.text}

get_filterable_fields

get_filterable_fields(max_depth: int = 1) -> list[FilterableField]

Returns a list of all fields that can be used to filter the agent run, by recursively exploring the model_dump() for singleton types in dictionaries.

Returns:

Type Description
list[FilterableField]

list[FilterableField]: A list of filterable fields, where each field is a dictionary containing its 'name' (path) and 'type'.

Source code in docent/data_models/agent_run.py
def get_filterable_fields(self, max_depth: int = 1) -> list[FilterableField]:
    """Returns a list of all fields that can be used to filter the agent run,
    by recursively exploring the model_dump() for singleton types in dictionaries.

    Returns:
        list[FilterableField]: A list of filterable fields, where each field is a
                               dictionary containing its 'name' (path) and 'type'.
    """

    result: list[FilterableField] = []

    def _explore_dict(d: dict[str, Any], prefix: str, depth: int):
        nonlocal result

        if depth > max_depth:
            return

        for k, v in d.items():
            if isinstance(v, (str, int, float, bool)):
                result.append(
                    {
                        "name": f"{prefix}.{k}",
                        "type": cast(Literal["str", "bool", "int", "float"], type(v).__name__),
                    }
                )
            elif isinstance(v, dict):
                _explore_dict(cast(dict[str, Any], v), f"{prefix}.{k}", depth + 1)

    # Look at the agent run metadata
    _explore_dict(self.metadata.model_dump(strip_internal_fields=True), "metadata", 0)
    # Look at the transcript metadata
    # TODO(mengk): restore this later when we have the ability to integrate with SQL.
    # for t_id, t in self.transcripts.items():
    #     _explore_dict(
    #         t.metadata.model_dump(strip_internal_fields=True), f"transcript.{t_id}.metadata", 0
    #     )

    # Append the text field
    result.append({"name": "text", "type": "str"})

    return result