if-emotion-ux/backend/claude_api_server.py

#!/usr/bin/env python3
"""
Claude Max API Server - OpenAI-compatible endpoint for if.emotion frontend

Bridges the React frontend to claude-code CLI using Max subscription.
Based on: https://idsc2025.substack.com/p/how-i-built-claude_max-to-unlock

Usage:
    python claude_api_server.py [--port 3001]
"""

import os
import sys
import json
import subprocess
import asyncio
from pathlib import Path
from datetime import datetime
from typing import Generator
import uuid

# Add infrafabric tools to path
sys.path.insert(0, '/home/setup/infrafabric/tools')

try:
    from flask import Flask, request, Response, jsonify
    from flask_cors import CORS
except ImportError:
    print("Installing required packages...")
    subprocess.run([sys.executable, "-m", "pip", "install", "flask", "flask-cors", "-q"])
    from flask import Flask, request, Response, jsonify
    from flask_cors import CORS

app = Flask(__name__)
CORS(app)  # Enable CORS for frontend

# Configuration
CLAUDE_CLI = Path.home() / ".local/bin/claude"
CREDENTIALS_FILE = Path.home() / ".claude/.credentials.json"

# Sergio Personality System Prompt
SERGIO_SYSTEM_PROMPT = """You are Sergio, a Spanish-speaking psychologist who specializes in debunking abstract spiritual language and operationalizing emotional concepts into concrete, testable frameworks.

CORE PERSONALITY:
- Direct and brash communication style - challenge unfalsifiable claims
- Systems-first thinker (context/culture factors, not individual blame)
- Bilingual Spanish/English code-switching is natural for you
- Convert abstract emotional language into concrete operational definitions
- Apply the Identity=Interaction framework: identity emerges from relational patterns, not fixed essence

VOICE GUIDELINES (MANDATORY):
- NEVER use bullet points or numbered lists - always narrative flow
- Variable sentence length pattern: short punchy + longer flowing explanation + short again
- Validate emotional reality FIRST, then challenge interpretation
- End with concrete operationalization: "What specific behavior in the next 30 minutes?"

RHETORICAL TOOLS:
- Aspiradora metaphor: When someone drowns in complexity, simplify to binary. "Una aspiradora no necesita 50 tipos de suciedad etiquetados. It needs one question: Is there dirt? Yes or no?"
- Reframing: "The problem isn't X. The problem is Y."
- Pattern exposure: "Here's what actually happens..."
- Counterexample testing: "What would falsify that belief?"

SPANISH USAGE:
- Use Spanish for emotional validation: "Mira, eso no está mal"
- Use Spanish for cultural concepts: vínculos, vergüenza ajena, sobremadre
- Use colloquial markers: tío, vale, pues, mira
- NEVER use formal Spanish: no obstante, asimismo, consecuentemente

ANTI-PATTERNS (NEVER DO):
- Never pathologize neurodivergence - frame as context mismatch, not deficit
- Never use "Furthermore", "In conclusion", "One could argue"
- Never create equal-length paragraphs
- Never give prescriptions without mechanism explanations

EXAMPLE RESPONSE STRUCTURE:
Hook (challenge assumption) → Narrative (explain mechanism) → Operationalization (concrete action) → Provocation (opening question)
"""

def load_credentials():
    """Load Claude Max credentials"""
    if CREDENTIALS_FILE.exists():
        with open(CREDENTIALS_FILE) as f:
            return json.load(f)
    return None

def call_claude_cli(prompt: str, stream: bool = False) -> Generator[str, None, None]:
    """
    Call Claude CLI using Max subscription authentication.

    Key insight from Arthur Collé's article:
    - Remove ANTHROPIC_API_KEY to force OAuth auth
    - CLI falls back to subscription credentials
    """
    env = os.environ.copy()

    # Remove API key to force subscription auth
    if "ANTHROPIC_API_KEY" in env:
        del env["ANTHROPIC_API_KEY"]

    # Force subscription mode
    env["CLAUDE_USE_SUBSCRIPTION"] = "true"

    try:
        if stream:
            # Streaming mode
            process = subprocess.Popen(
                [str(CLAUDE_CLI), "--print", prompt],
                env=env,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                bufsize=1
            )

            for line in process.stdout:
                yield line

            process.wait()
        else:
            # Non-streaming mode
            result = subprocess.run(
                [str(CLAUDE_CLI), "--print", prompt],
                env=env,
                capture_output=True,
                text=True,
                timeout=300
            )
            yield result.stdout

    except subprocess.TimeoutExpired:
        yield "[Error: Request timed out after 300s]"
    except Exception as e:
        yield f"[Error: {str(e)}]"

@app.route('/health', methods=['GET'])
def health():
    """Health check endpoint"""
    creds = load_credentials()
    return jsonify({
        "status": "healthy",
        "service": "claude-max-api",
        "subscription_type": creds.get("claudeAiOauth", {}).get("subscriptionType") if creds else None,
        "cli_path": str(CLAUDE_CLI),
        "cli_exists": CLAUDE_CLI.exists()
    })

@app.route('/v1/models', methods=['GET'])
def list_models():
    """OpenAI-compatible models endpoint"""
    return jsonify({
        "object": "list",
        "data": [
            {
                "id": "claude-max",
                "object": "model",
                "created": int(datetime.now().timestamp()),
                "owned_by": "anthropic",
                "permission": [],
                "root": "claude-max",
                "parent": None
            },
            {
                "id": "claude-sonnet-4",
                "object": "model",
                "created": int(datetime.now().timestamp()),
                "owned_by": "anthropic"
            },
            {
                "id": "claude-opus-4",
                "object": "model",
                "created": int(datetime.now().timestamp()),
                "owned_by": "anthropic"
            }
        ]
    })

@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
    """OpenAI-compatible chat completions endpoint with Sergio personality"""
    data = request.json
    messages = data.get('messages', [])
    stream = data.get('stream', False)
    model = data.get('model', 'claude-max')

    # Inject Sergio system prompt at the beginning
    prompt_parts = [f"System: {SERGIO_SYSTEM_PROMPT}"]

    # Build prompt from messages (any existing system prompts are additive)
    for msg in messages:
        role = msg.get('role', 'user')
        content = msg.get('content', '')
        if role == 'system':
            prompt_parts.append(f"System: {content}")  # Additional system instructions
        elif role == 'assistant':
            prompt_parts.append(f"Assistant: {content}")
        else:
            prompt_parts.append(f"Human: {content}")

    prompt = "\n\n".join(prompt_parts)

    if stream:
        def generate():
            response_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
            created = int(datetime.now().timestamp())

            for chunk in call_claude_cli(prompt, stream=True):
                # SSE format
                data = {
                    "id": response_id,
                    "object": "chat.completion.chunk",
                    "created": created,
                    "model": model,
                    "choices": [{
                        "index": 0,
                        "delta": {"content": chunk},
                        "finish_reason": None
                    }]
                }
                yield f"data: {json.dumps(data)}\n\n"

            # Final chunk
            final = {
                "id": response_id,
                "object": "chat.completion.chunk",
                "created": created,
                "model": model,
                "choices": [{
                    "index": 0,
                    "delta": {},
                    "finish_reason": "stop"
                }]
            }
            yield f"data: {json.dumps(final)}\n\n"
            yield "data: [DONE]\n\n"

        return Response(generate(), mimetype='text/event-stream')

    else:
        # Non-streaming response
        response_text = "".join(call_claude_cli(prompt, stream=False))

        return jsonify({
            "id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
            "object": "chat.completion",
            "created": int(datetime.now().timestamp()),
            "model": model,
            "choices": [{
                "index": 0,
                "message": {
                    "role": "assistant",
                    "content": response_text.strip()
                },
                "finish_reason": "stop"
            }],
            "usage": {
                "prompt_tokens": len(prompt) // 4,
                "completion_tokens": len(response_text) // 4,
                "total_tokens": (len(prompt) + len(response_text)) // 4
            }
        })

@app.route('/api/chat/completions', methods=['POST'])
def api_chat_completions():
    """Alternative endpoint (Open WebUI style)"""
    return chat_completions()

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--port', type=int, default=3001)
    parser.add_argument('--host', default='0.0.0.0')
    args = parser.parse_args()

    print(f"""
╔═══════════════════════════════════════════════════════════════╗
║  Claude Max API Server                                        ║
║  Backend for if.emotion using Max subscription               ║
╠═══════════════════════════════════════════════════════════════╣
║  Endpoint: http://{args.host}:{args.port}/v1/chat/completions          ║
║  Health:   http://{args.host}:{args.port}/health                       ║
║  Models:   http://{args.host}:{args.port}/v1/models                    ║
╠═══════════════════════════════════════════════════════════════╣
║  Based on: idsc2025.substack.com/p/how-i-built-claude_max     ║
╚═══════════════════════════════════════════════════════════════╝
    """)

    app.run(host=args.host, port=args.port, debug=True, threaded=True)