How to turn a local model into an API service?

Sometimes your AI project requires an API key and a base url, and you do not have access to the source code to manually modify the provider. Then you may consider a local API endpoint. Or you can also deploy a model on your remote server and expose it via an API endpoint.

Below is an example that I expose an embedding model as an API endpoint following the OpenAI API convention.

1. Setup

pip install fastapi uvicorn sentence-transformers pydantic

2. Python Code

from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List, Union
from sentence_transformers import SentenceTransformer

app = FastAPI()
API_KEY_NAME = "Authorization"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)

model = SentenceTransformer('all-MiniLM-L6-v2')

class EmbeddingRequest(BaseModel):
    input: Union[str, List[str]]
    model: str = "text-embedding-ada-002"

class EmbeddingResponse(BaseModel):
    object: str = "list"
    data: List[dict]
    model: str
    usage: dict

def get_embedding(text: str) -> List[float]:
    embedding = model.encode(text)
    return embedding.tolist()

async def get_api_key(api_key: str = Depends(api_key_header)):
    if api_key != "your_api_key_here":
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid API Key",
        )

@app.post("/v1/embeddings")
async def create_embedding(request: EmbeddingRequest, api_key: str = Depends(get_api_key)):
async def create_embedding(request: EmbeddingRequest):
    try:
        if isinstance(request.input, str):
            texts = [request.input]
        else:
            texts = request.input

        embeddings = [get_embedding(text) for text in texts]

        response = EmbeddingResponse(
            data=[
                {
                    "object": "embedding",
                    "embedding": embedding,
                    "index": i
                }
                for i, embedding in enumerate(embeddings)
            ],
            model=request.model,
            usage={
                "prompt_tokens": sum(len(text.split()) for text in texts),
                "total_tokens": sum(len(text.split()) for text in texts)
            }
        )
        return response
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

3. Test the API

curl -X POST "http://localhost:8000/v1/embeddings" \
-H "Content-Type: application/json" \
-d '{"input": "Hello, world!", "model": "text-embedding-ada-002"}'

Now it is done! Try out yourself.