animily_music/generate_music.py

"""Animily Music - ACE-Step 음악 생성

12개 세그먼트(각 ~595초) 생성 후 크로스페이드로 이어붙여 ~119분 완성.
"""
import json
import os
import subprocess
import time
import urllib.parse

import requests

from config import (
    ACESTEP_URL, OUTPUT_DIR, PROMPTS_DIR,
    SEGMENT_DURATION, SEGMENTS_FOR_2H, CROSSFADE_SEC, BATCH_SIZE,
)


def _wait_for_acestep(timeout=120):
    """ACE-Step API 서버 health 확인"""
    for _ in range(timeout):
        try:
            r = requests.get(f"{ACESTEP_URL}/health", timeout=3)
            if r.status_code == 200:
                return True
        except Exception:
            pass
        time.sleep(1)
    return False


def _submit_task(caption, bpm, keyscale, duration):
    """ACE-Step에 생성 작업 제출"""
    payload = {
        "think": True,
        "caption": caption,
        "lyrics": "[Instrumental]",
        "bpm": bpm,
        "duration": duration,
        "keyscale": keyscale,
        "language": "instrumental",
        "timesignature": "4",
        "batch_size": BATCH_SIZE,
    }
    r = requests.post(f"{ACESTEP_URL}/release_task", json=payload, timeout=30)
    r.raise_for_status()
    data = r.json()
    task_id = data.get("data", {}).get("task_id")
    if not task_id:
        raise RuntimeError(f"task_id를 받지 못했습니다: {data}")
    return task_id


def _poll_result(task_id, timeout=900):
    """작업 완료 대기 및 오디오 URL 반환"""
    t0 = time.time()
    while time.time() - t0 < timeout:
        try:
            r = requests.post(
                f"{ACESTEP_URL}/query_result",
                json={"task_id_list": [task_id]},
                timeout=10,
            )
            items = r.json().get("data", [])
            if not items:
                time.sleep(10)
                continue

            item = items[0]
            status = item.get("status")

            if status == 1:  # completed
                # result는 JSON 문자열
                result_str = item.get("result", "[]")
                try:
                    result_list = json.loads(result_str) if isinstance(result_str, str) else result_str
                except json.JSONDecodeError:
                    result_list = []

                if result_list and isinstance(result_list, list):
                    # 첫 번째 결과의 file URL 반환
                    file_url = result_list[0].get("file", "")
                    if file_url:
                        return file_url
                raise RuntimeError(f"완료됐으나 오디오 경로 없음: {item}")

            elif status == 2:  # failed/timeout
                raise RuntimeError(f"생성 실패: {item}")

            # status == 0: running
        except requests.RequestException:
            pass
        time.sleep(10)

    raise TimeoutError(f"task {task_id} 타임아웃 ({timeout}초)")


def _download_audio(file_url, output_path):
    """ACE-Step /v1/audio 엔드포인트에서 오디오 다운로드"""
    # file_url 형태: "/v1/audio?path=%2Fhome%2F..."
    if file_url.startswith("/"):
        download_url = f"{ACESTEP_URL}{file_url}"
    elif file_url.startswith("http"):
        download_url = file_url
    else:
        download_url = f"{ACESTEP_URL}/v1/audio?path={urllib.parse.quote(file_url)}"

    r = requests.get(download_url, timeout=120)
    r.raise_for_status()

    with open(output_path, "wb") as f:
        f.write(r.content)

    if not os.path.exists(output_path) or os.path.getsize(output_path) < 1000:
        raise RuntimeError(f"다운로드 실패 또는 빈 파일: {output_path}")
    print(f"    다운로드: {os.path.getsize(output_path) // 1024}KB")


def _crossfade_segments(segment_paths, output_path, crossfade_ms=5000):
    """ffmpeg로 세그먼트들을 크로스페이드하여 이어붙이기"""
    if len(segment_paths) == 1:
        subprocess.run(["cp", segment_paths[0], output_path], check=True)
        return

    current = segment_paths[0]

    for i, next_seg in enumerate(segment_paths[1:], 1):
        tmp_out = os.path.join(OUTPUT_DIR, f"_merge_{i}.wav")
        cmd = [
            "ffmpeg", "-y",
            "-i", current,
            "-i", next_seg,
            "-filter_complex",
            f"acrossfade=d={crossfade_ms // 1000}:c1=tri:c2=tri",
            "-c:a", "pcm_s16le",
            tmp_out,
        ]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode != 0:
            # 폴백: 단순 concat
            cmd_concat = [
                "ffmpeg", "-y",
                "-i", current,
                "-i", next_seg,
                "-filter_complex", "[0:a][1:a]concat=n=2:v=0:a=1[out]",
                "-map", "[out]", "-c:a", "pcm_s16le",
                tmp_out,
            ]
            subprocess.run(cmd_concat, capture_output=True)

        if current.startswith(os.path.join(OUTPUT_DIR, "_merge_")):
            os.remove(current)
        current = tmp_out

    # 마지막 페이드아웃
    probe = subprocess.run(
        ["ffprobe", "-v", "quiet", "-show_entries", "format=duration",
         "-of", "default=noprint_wrappers=1:nokey=1", current],
        capture_output=True, text=True,
    )
    try:
        total_dur = float(probe.stdout.strip())
        fade_start = max(0, total_dur - 5)
    except ValueError:
        fade_start = 7100

    cmd_fade = [
        "ffmpeg", "-y", "-i", current,
        "-af", f"afade=t=out:st={fade_start}:d=5",
        "-c:a", "pcm_s16le", output_path,
    ]
    subprocess.run(cmd_fade, capture_output=True)
    if current.startswith(os.path.join(OUTPUT_DIR, "_merge_")):
        os.remove(current)

    print(f"  [MUSIC] 최종 음악 완성: {output_path}")


def _load_prompts(animal_type):
    """프롬프트 파일 로드 (dog or cat)"""
    path = os.path.join(PROMPTS_DIR, f"{animal_type}_music.json")
    with open(path) as f:
        return json.load(f)


def generate_2h_music(animal_type="dog", style_index=0):
    """2시간 음악 생성 (12세그먼트)

    Returns:
        str: 최종 음악 파일 경로
    """
    prompts = _load_prompts(animal_type)
    style = prompts["styles"][style_index % len(prompts["styles"])]

    base_caption = style["caption"]
    bpm = style["bpm"]
    keyscale = style["keyscale"]
    variations = style.get("variations", [])

    print(f"\n{'='*60}")
    print(f"[MUSIC] {animal_type} 2시간 음악 생성 ({SEGMENTS_FOR_2H}세그먼트)")
    print(f"  Style: {style['name']}, BPM: {bpm}, Key: {keyscale}")
    print(f"{'='*60}\n")

    if not _wait_for_acestep():
        raise RuntimeError("ACE-Step API 서버에 연결할 수 없습니다")

    segment_paths = []
    for i in range(SEGMENTS_FOR_2H):
        seg_path = os.path.join(OUTPUT_DIR, f"seg_{animal_type}_{i:02d}.wav")

        if variations:
            var = variations[i % len(variations)]
            caption = f"{base_caption} {var}"
        else:
            caption = base_caption

        print(f"  [SEG {i+1}/{SEGMENTS_FOR_2H}] 생성 중...", flush=True)
        t0 = time.time()

        try:
            task_id = _submit_task(caption, bpm, keyscale, SEGMENT_DURATION)
            file_url = _poll_result(task_id, timeout=900)
            _download_audio(file_url, seg_path)
            elapsed = time.time() - t0
            print(f"  [SEG {i+1}/{SEGMENTS_FOR_2H}] 완료 ({elapsed:.0f}초)")
            segment_paths.append(seg_path)
        except Exception as e:
            print(f"  [SEG {i+1}] 실패: {e}")
            continue

    if len(segment_paths) < 6:
        raise RuntimeError(f"세그먼트 {len(segment_paths)}개만 성공 — 최소 6개 필요")

    output_path = os.path.join(OUTPUT_DIR, f"music_{animal_type}_2h.wav")
    _crossfade_segments(segment_paths, output_path, CROSSFADE_SEC * 1000)

    for p in segment_paths:
        if os.path.exists(p):
            os.remove(p)

    return output_path


if __name__ == "__main__":
    import sys
    animal = sys.argv[1] if len(sys.argv) > 1 else "dog"
    result = generate_2h_music(animal)
    print(f"\n완료: {result}")