@langwatch/scenario
    Preparing search index...

    Interface RedTeamAgentConfig

    interface RedTeamAgentConfig {
        attackPlan?: string;
        detectRefusals?: boolean;
        injectionProbability?: number;
        maxTokens?: number;
        metapromptModel?: LanguageModel;
        metapromptTemperature?: number;
        metapromptTemplate?: string;
        model?: LanguageModel;
        scoreResponses?: boolean;
        strategy: RedTeamStrategy;
        successConfirmTurns?: number;
        successScore?: number;
        target: string;
        techniques?: AttackTechnique[];
        temperature?: number;
        totalTurns?: number;
    }
    Index

    Properties

    attackPlan?: string
    detectRefusals?: boolean

    Use pattern-based refusal detection to skip LLM scorer on obvious refusals. Default true.

    injectionProbability?: number

    Probability (0.0-1.0) of applying a random encoding technique per turn. Default 0.0 (off).

    maxTokens?: number
    metapromptModel?: LanguageModel
    metapromptTemperature?: number

    Separate temperature for metaprompt/scoring calls. Defaults to temperature.

    metapromptTemplate?: string
    model?: LanguageModel
    scoreResponses?: boolean

    Score target responses each turn to feed back into the attacker. Default true.

    strategy: RedTeamStrategy
    successConfirmTurns?: number

    Consecutive turns >= threshold before triggering early exit. Default 2.

    successScore?: number

    Score threshold (0-10) for early exit. Default 9. Set to undefined to disable.

    target: string
    techniques?: AttackTechnique[]

    List of AttackTechnique instances to sample from. Defaults to all built-ins.

    temperature?: number
    totalTurns?: number