How to deploy a local model as an API endpoint
How to turn a local model into an API service?
Sometimes your AI project requires an API key and a base url, and you do not have access to the source code to manually modify the provider. Then you may consider a local API endpoint. Or you can also deploy a model on your remote server and expose it via an API endpoint.
Below is an example that I expose an embedding model as an API endpoint following the OpenAI API convention.
1. Setup
pip install fastapi uvicorn sentence-transformers pydantic
2. Python Code
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Union
from sentence_transformers import SentenceTransformer
app = FastAPI()
API_KEY_NAME = "Authorization"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
model = SentenceTransformer('all-MiniLM-L6-v2')
class EmbeddingRequest(BaseModel):
input: Union[str, List[str]]
model: str = "text-embedding-ada-002"
class EmbeddingResponse(BaseModel):
object: str = "list"
data: List[dict]
model: str
usage: dict
def get_embedding(text: str) -> List[float]:
embedding = model.encode(text)
return embedding.tolist()
async def get_api_key(api_key: str = Depends(api_key_header)):
if api_key != "your_api_key_here":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API Key",
)
@app.post("/v1/embeddings")
async def create_embedding(request: EmbeddingRequest, api_key: str = Depends(get_api_key)):
async def create_embedding(request: EmbeddingRequest):
try:
if isinstance(request.input, str):
texts = [request.input]
else:
texts = request.input
embeddings = [get_embedding(text) for text in texts]
response = EmbeddingResponse(
data=[
{
"object": "embedding",
"embedding": embedding,
"index": i
}
for i, embedding in enumerate(embeddings)
],
model=request.model,
usage={
"prompt_tokens": sum(len(text.split()) for text in texts),
"total_tokens": sum(len(text.split()) for text in texts)
}
)
return response
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
3. Test the API
curl -X POST "http://localhost:8000/v1/embeddings" \
-H "Content-Type: application/json" \
-d '{"input": "Hello, world!", "model": "text-embedding-ada-002"}'
Now it is done! Try out yourself.