Compute
Process images received via SMS by running OCR in a remote function.
A webhook-driven pipeline that receives inbound text messages (via Twilio), downloads the attached image, runs OCR to extract text, and replies with the result. The heavy lifting — downloading, OCR, and replying — happens in a remote function that scales independently from the webhook receiver.
This is a common pattern for SMS-based workflows: receipt scanning, document intake, ID verification, or any case where users send photos and expect a text reply.
The worker function receives a message payload, downloads the image from the MMS URL, runs OCR with Tesseract, and sends the extracted text back via Twilio.
Tesseract needs system packages, so we bake them into the image:
# ocr_worker.py
import json
import os
import chalkcompute
from chalkcompute import Container, Image, Secret, Volume
ocr_image = (
Image.base("python:3.12-slim")
.run_commands(
"apt-get update -qq && apt-get install -y -qq tesseract-ocr libtesseract-dev",
)
.pip_install(["pytesseract", "Pillow", "httpx", "twilio"])
)
ocr_container = Container(
image=ocr_image,
cpu="2",
memory="2Gi",
min_instances=0,
max_instances=8,
max_concurrent_requests=4,
secrets=[
Secret(name="TWILIO_ACCOUNT_SID"),
Secret(name="TWILIO_AUTH_TOKEN"),
Secret(name="TWILIO_PHONE_NUMBER"),
],
volumes={"ocr-archive": "/archive"},
)The volume at /archive stores every processed image and its extracted text — useful
for auditing or reprocessing later.
@chalkcompute.function(name="ocr-image", container=ocr_container)
def ocr_image(message_json: str) -> str:
"""Download an MMS image, run OCR, archive the result, and reply via SMS."""
import io
import httpx
import pytesseract
from PIL import Image as PILImage
from twilio.rest import Client
from datetime import datetime, timezone
message = json.loads(message_json)
image_url = message["image_url"]
from_number = message["from_number"]
message_sid = message["message_sid"]
# 1. Download the image from Twilio's media URL.
# Twilio media URLs require basic auth with your account credentials.
resp = httpx.get(
image_url,
auth=(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"]),
follow_redirects=True,
timeout=30,
)
resp.raise_for_status()
img = PILImage.open(io.BytesIO(resp.content))
# 2. Run OCR.
extracted_text = pytesseract.image_to_string(img).strip()
if not extracted_text:
extracted_text = "(no text detected in image)"
# 3. Archive the image and result to the volume.
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
archive_dir = f"/archive/{from_number}/{timestamp}_{message_sid}"
os.makedirs(archive_dir, exist_ok=True)
img.save(os.path.join(archive_dir, "image.png"))
with open(os.path.join(archive_dir, "text.txt"), "w") as f:
f.write(extracted_text)
# 4. Reply via SMS with the extracted text.
twilio = Client(
os.environ["TWILIO_ACCOUNT_SID"],
os.environ["TWILIO_AUTH_TOKEN"],
)
# Truncate to SMS-friendly length.
reply_body = extracted_text[:1500]
if len(extracted_text) > 1500:
reply_body += "\n\n(truncated)"
twilio.messages.create(
body=f"Extracted text:\n\n{reply_body}",
from_=os.environ["TWILIO_PHONE_NUMBER"],
to=from_number,
)
return f"{message_sid}:ok:{len(extracted_text)} chars"Deploy the worker:
chalk compute deploy ocr_worker.py
# Deployed function "ocr-image" on container ...Twilio sends an HTTP POST to your webhook URL whenever a text message arrives. The receiver parses the Twilio payload, checks for an image attachment, and dispatches the OCR function.
# webhook.py
import json
import logging
from fastapi import FastAPI, Form
from fastapi.responses import Response
import chalkcompute
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)
app = FastAPI()
ocr_fn = chalkcompute.function_ref("ocr-image")
@app.on_event("startup")
def on_startup() -> None:
ocr_fn.wait_ready()
log.info("OCR worker ready")
@app.post("/sms")
async def handle_sms(
From: str = Form(...),
Body: str = Form(""),
NumMedia: str = Form("0"),
MessageSid: str = Form(""),
MediaUrl0: str = Form(None),
MediaContentType0: str = Form(None),
) -> Response:
"""Handle an inbound Twilio SMS/MMS webhook."""
if int(NumMedia) == 0 or MediaUrl0 is None:
# No image attached — return a TwiML response.
return Response(
content=(
'<?xml version="1.0" encoding="UTF-8"?>'
"<Response><Message>Send me a photo and I'll extract the text from it.</Message></Response>"
),
media_type="application/xml",
)
if not MediaContentType0.startswith("image/"):
return Response(
content=(
'<?xml version="1.0" encoding="UTF-8"?>'
"<Response><Message>I can only process images, not other file types.</Message></Response>"
),
media_type="application/xml",
)
# Dispatch to the OCR worker — don't block the webhook response.
payload = json.dumps({
"image_url": MediaUrl0,
"from_number": From,
"message_sid": MessageSid,
"body": Body,
})
ocr_fn.async_call(payload)
log.info("Dispatched OCR for message %s from %s", MessageSid, From)
# Return empty TwiML — the worker will send the reply directly.
return Response(
content='<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
media_type="application/xml",
)
@app.get("/health")
def health() -> dict:
return {"status": "ok"}The webhook returns immediately with empty TwiML. The OCR worker handles the reply asynchronously via the Twilio API, so users don’t see a timeout even if OCR takes several seconds.
# deploy_webhook.py
from chalkcompute import Container, Image, Secret
image = (
Image.base("python:3.12-slim")
.pip_install(["fastapi", "uvicorn", "chalkcompute", "python-multipart"])
.add_local_file("webhook.py", "/app/webhook.py")
)
container = Container(
image=image,
name="sms-ocr-webhook",
port=8000,
cpu="0.5",
memory="512Mi",
secrets=[
Secret(name="TWILIO_AUTH_TOKEN"),
],
entrypoint=[
"uvicorn", "webhook:app",
"--host", "0.0.0.0",
"--port", "8000",
"--app-dir", "/app",
],
).run()
print(f"Webhook URL: {container.info.web_url}/sms")
print("Set this as your Twilio phone number's webhook URL.")chalk compute deploy deploy_webhook.py
# Deployed container ...
# Webhook URL: https://xxxx.compute.chalk.ai/smsCopy the /sms URL into your Twilio phone number’s messaging webhook configuration.
User's phone Twilio Webhook Container OCR Workers
│ │ │ │
│ MMS (photo) │ │ │
│──────────────────▸│ │ │
│ │ POST /sms │ │
│ │ (From, MediaUrl0) │ │
│ │──────────────────────▸│ │
│ │ │ │
│ │ empty TwiML │ │
│ │◂──────────────────────│ │
│ │ │ │
│ │ │ async_call(msg) │
│ │ │───────────────────▸│
│ │ │ │
│ │ │ │── download image
│ │ │ │── tesseract OCR
│ │ │ │── archive to Volume
│ │ │ │
│ │ twilio.messages.create(reply) │
│ │◂───────────────────────────────────────────│
│ │ │ │
│ SMS reply with │ │ │
│ extracted text │ │ │
│◂──────────────────│ │ │
│ │ │ │
Every processed image is saved to the volume, organized by phone number and timestamp. You can browse or reprocess them from any script:
from chalkcompute import Volume
vol = Volume(name="ocr-archive")
# List recent OCR results for a number.
for entry in vol.listdir("+15551234567/"):
print(entry.path)
# Read extracted text from a specific run.
text = vol.read_file("+15551234567/20260410_143022_SM1234abc/text.txt")
print(text.decode())