Module scenario.voice.effects.prosody

Prosody effects: volume scaling and time-stretching speech.

Expand source code
"""Prosody effects: volume scaling and time-stretching speech."""

from __future__ import annotations

import numpy as np

from ._common import EffectFn, np_to_pcm16, pcm16_to_np


def low_volume(factor: float = 0.5) -> EffectFn:
    """Scale amplitude down by ``factor`` (0 < factor <= 1)."""
    if factor <= 0:
        raise ValueError("low_volume factor must be > 0")

    def _apply(audio: bytes) -> bytes:
        arr = pcm16_to_np(audio).astype(np.float32) * factor
        return np_to_pcm16(arr)

    return _apply


def high_volume(factor: float = 1.5) -> EffectFn:
    """Scale amplitude up by ``factor`` (>= 1). Clips at int16 bounds."""
    if factor < 1:
        raise ValueError("high_volume factor must be >= 1")

    def _apply(audio: bytes) -> bytes:
        arr = pcm16_to_np(audio).astype(np.float32) * factor
        return np_to_pcm16(arr)

    return _apply


def speaking_fast(factor: float = 1.3) -> EffectFn:
    """
    Time-stretch to speak faster (factor > 1). Linear resample, so pitch shifts.

    Source table says "time-stretch without pitch change" — true pitch-preserving
    stretching needs phase vocoder. Our implementation prioritises simplicity
    and zero extra deps; pitch shift is a documented tradeoff.
    """
    if factor <= 1:
        raise ValueError("speaking_fast factor must be > 1")
    return _resample_factor(factor)


def speaking_slow(factor: float = 0.7) -> EffectFn:
    """Time-stretch to speak slower (factor < 1). Same pitch tradeoff as speaking_fast."""
    if factor >= 1:
        raise ValueError("speaking_slow factor must be < 1")
    return _resample_factor(factor)


def _resample_factor(factor: float) -> EffectFn:
    def _apply(audio: bytes) -> bytes:
        arr = pcm16_to_np(audio)
        if len(arr) == 0:
            return audio
        new_len = max(1, int(round(len(arr) / factor)))
        idx = np.linspace(0, len(arr) - 1, new_len).astype(np.int64)
        return np_to_pcm16(arr[idx])

    return _apply

Functions

def high_volume(factor: float = 1.5) ‑> Callable[[bytes], bytes]

Scale amplitude up by factor (>= 1). Clips at int16 bounds.

Expand source code
def high_volume(factor: float = 1.5) -> EffectFn:
    """Scale amplitude up by ``factor`` (>= 1). Clips at int16 bounds."""
    if factor < 1:
        raise ValueError("high_volume factor must be >= 1")

    def _apply(audio: bytes) -> bytes:
        arr = pcm16_to_np(audio).astype(np.float32) * factor
        return np_to_pcm16(arr)

    return _apply
def low_volume(factor: float = 0.5) ‑> Callable[[bytes], bytes]

Scale amplitude down by factor (0 < factor <= 1).

Expand source code
def low_volume(factor: float = 0.5) -> EffectFn:
    """Scale amplitude down by ``factor`` (0 < factor <= 1)."""
    if factor <= 0:
        raise ValueError("low_volume factor must be > 0")

    def _apply(audio: bytes) -> bytes:
        arr = pcm16_to_np(audio).astype(np.float32) * factor
        return np_to_pcm16(arr)

    return _apply
def speaking_fast(factor: float = 1.3) ‑> Callable[[bytes], bytes]

Time-stretch to speak faster (factor > 1). Linear resample, so pitch shifts.

Source table says "time-stretch without pitch change" — true pitch-preserving stretching needs phase vocoder. Our implementation prioritises simplicity and zero extra deps; pitch shift is a documented tradeoff.

Expand source code
def speaking_fast(factor: float = 1.3) -> EffectFn:
    """
    Time-stretch to speak faster (factor > 1). Linear resample, so pitch shifts.

    Source table says "time-stretch without pitch change" — true pitch-preserving
    stretching needs phase vocoder. Our implementation prioritises simplicity
    and zero extra deps; pitch shift is a documented tradeoff.
    """
    if factor <= 1:
        raise ValueError("speaking_fast factor must be > 1")
    return _resample_factor(factor)
def speaking_slow(factor: float = 0.7) ‑> Callable[[bytes], bytes]

Time-stretch to speak slower (factor < 1). Same pitch tradeoff as speaking_fast.

Expand source code
def speaking_slow(factor: float = 0.7) -> EffectFn:
    """Time-stretch to speak slower (factor < 1). Same pitch tradeoff as speaking_fast."""
    if factor >= 1:
        raise ValueError("speaking_slow factor must be < 1")
    return _resample_factor(factor)