---
┌─────────────────────────────────────────────────────────────┐
│ MacStudio M3 Ultra (本地) │
│ 128GB统一内存 │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────────────────────────────────────────────────┐│
│ │ Ollama / llama.cpp 模型服务 ││
│ │ ┌──────────────┐ ┌──────────────┐ ┌─────────────┐ ││
│ │ │DeepSeek-R1-32B│ │ Qwen2.5-72B │ │ 其他模型 │ ││
│ │ │ (量化版) │ │ (量化版) │ │ │ ││
│ │ └──────────────┘ └──────────────┘ └─────────────┘ ││
│ │ ↑ ↑ ││
│ │ └────────────────────┘ ││
│ │ 本地推理 (优先) ││
│ └─────────────────────────────────────────────────────────┘│
└─────────────────────────────────────────────────────────────┘
│
│ HTTP API
│
┌─────────────────────────────────────────────────────────────┐
│ 阿里云服务器 47.96.253.194 │
├─────────────────────────────────────────────────────────────┤
│ ┌─────────────────────────────────────────────────────────┐│
│ │ SaaS Core (FastAPI) ││
│ │ ┌──────────────┐ ┌──────────────┐ ││
│ │ │ Agent队列 │ │ 路由决策 │ ││
│ │ │ (Redis) │ │ 引擎 │ ││
│ │ └──────────────┘ └──────────────┘ ││
│ │ │ ││
│ │ ┌────────┴────────┐ ││
│ │ ↓ ↓ ││
│ │ ┌──────────┐ ┌──────────┐ ││
│ │ │ 本地模型 │ │ API调用 │ ││
│ │ │(MacStudio)│ │(DeepSeek)│ ││
│ │ │(Qwen7B) │ │(MiniMax) │ ││
│ │ │(Kimi) │ │(Kimi) │ ││
│ │ └──────────┘ └──────────┘ ││
│ │ ↑ ↑ ││
│ │ └─────────────────┘ ││
│ │ 结果合并 ││
│ └─────────────────────────────────────────────────────────┘│
└─────────────────────────────────────────────────────────────┘
| 任务类型 | 复杂度 | 路由策略 | 模型选择 | 响应时间 |
|---|
| 客户画像分析 | 中 | 本地优先 | Qwen2.5-7B (MacStudio) | 10-20s |
| 商机评分 | 高 | 本地优先 | DeepSeek-R1-32B (MacStudio) | 30-60s |
| **项目商机分析** | **极高** | **本地优先** | **DeepSeek-R1-32B (MacStudio)** | **60-120s** |
| 日报汇总 | 中 | 本地优先 | Qwen2.5-7B (MacStudio) | 15-30s |
| 流失预警 | 中 | 本地优先 | Qwen2.5-7B (MacStudio) | 10-20s |
| 报价建议 | 高 | 本地优先 | DeepSeek-R1-32B (MacStudio) | 30-60s |
| 合同风险 | 极高 | API备用 | DeepSeek API / Kimi API | 30-60s |
| 库存预警 | 低 | 本地优先 | Qwen2.5-7B (MacStudio) | 5-10s |
| 营销活动 | 中 | API备用 | MiniMax API / Kimi API | 10-20s |
| 报表解读 | 低 | 本地优先 | Qwen2.5-7B (MacStudio) | 10-20s |
MODEL_ROUTING_CONFIG = {
"primary": "macstudio_local", # 本地MacStudio优先
"fallback": "api_deepseek", # 备用:DeepSeek API
"tertiary": "api_minimax", # 第三选择:MiniMax API
"quaternary": "api_kimi", # 第四选择:Kimi API
"routing_rules": {
"local_available": {
"condition": "macstudio.health == 'ok' AND macstudio.load < 0.8",
"models": ["deepseek-r1-32b", "qwen2.5-7b"]
},
"api_fallback": {
"condition": "local_available == False OR timeout > 60s",
"models": ["deepseek-api", "minimax-api", "kimi-api"]
}
},
"timeout_config": {
"local_model": 120, # 本地模型超时120秒
"api_model": 60 # API调用超时60秒
}
}
---
| 配置项 | 规格 | 说明 |
|---|
| 芯片 | Apple M3 Ultra | 28核CPU + 80核GPU |
| 内存 | 128GB 统一内存 | 适合大模型加载 |
| 存储 | 1TB SSD | 模型文件存储 |
| 网络 | 千兆以太网 | 与阿里云服务器通信 |
# 1. 安装 Homebrew
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
# 2. 安装 Ollama(本地大模型管理)
brew install ollama
# 3. 安装 llama.cpp(高性能推理)
brew install llama.cpp
# 4. 安装 Python 环境
brew install python@3.12
pip3 install torch torchvision torchaudio
# 5. 安装模型服务依赖
pip3 install fastapi uvicorn transformers accelerate
# 使用 Ollama 下载模型
ollama pull deepseek-r1:32b # DeepSeek-R1 32B(量化版)
ollama pull qwen2.5:72b # Qwen2.5 72B(量化版)
ollama pull qwen2.5:7b # Qwen2.5 7B(轻量版)
# 启动 Ollama 服务
ollama serve
# 测试模型
curl http://localhost:11434/api/generate -d '{
"model": "deepseek-r1:32b",
"prompt": "分析这个商机:客户是宁波汽车部件厂,月需求50吨PCR-ABS"
}'
# macstudio_model_server.py
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import requests
import json
app = FastAPI(title="MacStudio Model Server")
OLLAMA_URL = "http://localhost:11434"
class ModelRequest(BaseModel):
model: str = "deepseek-r1:32b"
prompt: str
temperature: float = 0.7
max_tokens: int = 2048
@app.post("/v1/chat/completions")
async def chat_completion(request: ModelRequest):
"""兼容OpenAI API格式的模型调用"""
try:
response = requests.post(f"{OLLAMA_URL}/api/generate", json={
"model": request.model,
"prompt": request.prompt,
"stream": False,
"options": {
"temperature": request.temperature,
"num_predict": request.max_tokens
}
})
result = response.json()
return {
"id": "macstudio-local",
"object": "chat.completion",
"model": request.model,
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": result.get("response", "")
},
"finish_reason": "stop"
}]
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health_check():
"""健康检查"""
try:
response = requests.get(f"{OLLAMA_URL}/api/tags")
models = response.json().get("models", [])
return {
"status": "ok",
"models": [m["name"] for m in models],
"memory": "128GB",
"gpu": "M3 Ultra 80-core"
}
except:
return {"status": "error", "message": "Ollama not running"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=11435)
---
# backend/app/services/model_router.py
import requests
import asyncio
from typing import Optional, Dict, Any
class ModelRouter:
"""模型路由服务:本地模型优先,API备用"""
def __init__(self):
self.local_url = "http://macstudio.local:11435" # MacStudio本地地址
self.api_configs = {
"deepseek": {
"url": "https://api.deepseek.com/v1/chat/completions",
"key": "YOUR_DEEPSEEK_KEY" # 已包年
},
"minimax": {
"url": "https://api.minimax.chat/v1/text/chatcompletion_v2",
"key": "YOUR_MINIMAX_KEY" # 已包年
},
"kimi": {
"url": "https://api.moonshot.cn/v1/chat/completions",
"key": "YOUR_KIMI_KEY" # 已包年
}
}
async def route_request(self, node_id: str, prompt: str, complexity: str = "high") -> Dict[str, Any]:
"""
路由请求到合适的模型
"""
# 1. 尝试本地模型(MacStudio)
if complexity in ["high", "extreme"] and await self._check_local_health():
try:
result = await self._call_local_model(node_id, prompt)
return {"source": "macstudio_local", "result": result}
except Exception as e:
print(f"Local model failed: {e}, falling back to API")
# 2. 备用:API调用
return await self._call_api_model(node_id, prompt, complexity)
async def _check_local_health(self) -> bool:
"""检查MacStudio健康状态"""
try:
response = requests.get(f"{self.local_url}/health", timeout=5)
return response.json().get("status") == "ok"
except:
return False
async def _call_local_model(self, node_id: str, prompt: str) -> str:
"""调用MacStudio本地模型"""
model_map = {
"A03": "deepseek-r1:32b", # 项目商机分析
"A02": "deepseek-r1:32b", # 商机评分
"A04": "qwen2.5:7b", # 日报汇总
"A05": "qwen2.5:7b", # 流失预警
"default": "qwen2.5:7b"
}
model = model_map.get(node_id, "qwen2.5:7b")
response = requests.post(f"{self.local_url}/v1/chat/completions", json={
"model": model,
"prompt": prompt,
"temperature": 0.7,
"max_tokens": 2048
}, timeout=120)
return response.json()["choices"][0]["message"]["content"]
async def _call_api_model(self, node_id: str, prompt: str, complexity: str) -> Dict[str, Any]:
"""调用API模型(备用)"""
# 根据复杂度选择API
if complexity == "extreme":
api_name = "deepseek" # 复杂任务用DeepSeek
elif complexity == "high":
api_name = "kimi" # 高复杂度用Kimi
else:
api_name = "minimax" # 普通任务用MiniMax
config = self.api_configs[api_name]
response = requests.post(config["url"], headers={
"Authorization": f"Bearer {config['key']}",
"Content-Type": "application/json"
}, json={
"model": "default",
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.7,
"max_tokens": 2048
}, timeout=60)
return {"source": f"api_{api_name}", "result": response.json()}
# 全局路由实例
model_router = ModelRouter()
# backend/config.py 更新
MODEL_CONFIG = {
"deployment_mode": "hybrid", # 混合模式:本地+API
"local_endpoint": "http://macstudio.local:11435", # MacStudio地址
"api_backup": True, # 启用API备用
"timeout": {
"local": 120, # 本地模型超时120秒
"api": 60 # API调用超时60秒
},
"models": {
"A03_project_analysis": {
"primary": "deepseek-r1:32b", # MacStudio本地
"fallback": "deepseek-api", # API备用
"timeout": 120
},
"A04_daily_report": {
"primary": "qwen2.5:7b", # MacStudio本地
"fallback": "kimi-api", # API备用
"timeout": 60
}
}
}
---
# 1. 配置静态IP(建议)
# 系统偏好设置 → 网络 → 以太网 → 手动
IP: 192.168.1.100
子网掩码: 255.255.255.0
路由器: 192.168.1.1
DNS: 8.8.8.8, 114.114.114.114
# 2. 配置防火墙允许访问
sudo /usr/libexec/ApplicationFirewall/socketfilterfw --add /usr/local/bin/ollama
sudo /usr/libexec/ApplicationFirewall/socketfilterfw --unblockapp /usr/local/bin/ollama
# 3. 配置路由器端口转发
# 将外部端口 11435 转发到 192.168.1.100:11435
# 或使用内网穿透(frp/ngrok)
# frpc.ini(MacStudio端)
[common]
server_addr = 47.96.253.194
server_port = 7000
token = your_frp_token
[macstudio-ollama]
type = tcp
local_ip = 127.0.0.1
local_port = 11435
remote_port = 11435
# frps.ini(阿里云服务器端)
[common]
bind_port = 7000
token = your_frp_token
---
---
| 项目 | 费用 | 说明 |
|---|
| frp内网穿透 | 0元 | 开源免费 |
| 网络带宽 | 0元 | 复用现有 |
| MacStudio电力 | 约100元/月 | 估算 |
| **新增合计** | **100元/月** |
| 项目 | 首年费用 | 次年费用 |
|---|
| 400电话硬件 | 2800元 | 0元 |
| 400电话月租 | 1200元 | 1200元 |
| 智能路由 | 600元 | 600元 |
| API调用(已包年) | 0元 | 0元 |
| MacStudio电力 | 1200元 | 1200元 |
| **总计** | **5800元** | **3000元** |
---
---
**文档版本**:v2.0(MacStudio + API混合架构)
**日期**:2026-06-15
**审批人**:admin@topcentral.cn / 麻一明
**执行状态**:待启动