Polling Best Practices
Learn how to efficiently poll for job completion without wasting resources or hitting rate limits.
Why Polling?
Polling is the simplest way to wait for job completion:
- No server setup required (unlike webhooks)
- Works in any environment
- Easy to implement and debug
Basic Polling
The simplest polling implementation:
- Python
- Node.js
import time
import requests
def poll_job(job_id: str, api_key: str) -> dict:
"""Poll until job completes."""
while True:
response = requests.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {api_key}"}
)
job = response.json()
if job["status"] in ["done", "failed"]:
return job
time.sleep(5) # Wait 5 seconds
async function pollJob(jobId, apiKey) {
while (true) {
const response = await fetch(
`https://api.knowhereto.ai/v1/jobs/${jobId}`,
{ headers: { 'Authorization': `Bearer ${apiKey}` } }
);
const job = await response.json();
if (['done', 'failed'].includes(job.status)) {
return job;
}
await new Promise(r => setTimeout(r, 5000)); // Wait 5 seconds
}
}
Problem: This polls at a fixed rate, which may be too fast for long jobs or too slow for quick ones.
Exponential Backoff
Better approach: start with short intervals and gradually increase:
- Python
- Node.js
import time
import requests
def poll_with_backoff(
job_id: str,
api_key: str,
initial_delay: float = 2.0,
max_delay: float = 10.0,
backoff_factor: float = 1.5,
timeout: float = 300.0
) -> dict:
"""Poll with exponential backoff."""
start_time = time.time()
delay = initial_delay
while time.time() - start_time < timeout:
response = requests.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {api_key}"}
)
job = response.json()
status = job["status"]
if status == "done":
return job
elif status == "failed":
raise Exception(f"Job failed: {job['error']['message']}")
# Log progress
if "progress" in job:
p = job["progress"]
print(f"[{status}] {p['processed_pages']}/{p['total_pages']} pages")
else:
print(f"[{status}] Processing...")
# Wait with backoff
time.sleep(delay)
delay = min(delay * backoff_factor, max_delay)
raise TimeoutError(f"Job did not complete within {timeout} seconds")
async function pollWithBackoff(
jobId,
apiKey,
{
initialDelay = 2000,
maxDelay = 10000,
backoffFactor = 1.5,
timeout = 300000
} = {}
) {
const startTime = Date.now();
let delay = initialDelay;
while (Date.now() - startTime < timeout) {
const response = await fetch(
`https://api.knowhereto.ai/v1/jobs/${jobId}`,
{ headers: { 'Authorization': `Bearer ${apiKey}` } }
);
const job = await response.json();
if (job.status === 'done') {
return job;
} else if (job.status === 'failed') {
throw new Error(`Job failed: ${job.error.message}`);
}
// Log progress
if (job.progress) {
console.log(
`[${job.status}] ${job.progress.processed_pages}/${job.progress.total_pages} pages`
);
} else {
console.log(`[${job.status}] Processing...`);
}
// Wait with backoff
await new Promise(r => setTimeout(r, delay));
delay = Math.min(delay * backoffFactor, maxDelay);
}
throw new Error(`Job did not complete within ${timeout}ms`);
}
Recommended Polling Strategy
Based on typical processing times:
| Document Size | Initial Delay | Max Delay | Expected Time |
|---|---|---|---|
| Small (1-10 pages) | 2s | 5s | 10-30s |
| Medium (10-50 pages) | 3s | 10s | 30-120s |
| Large (50+ pages) | 5s | 15s | 2-5 min |
- Python
- Node.js
def smart_poll(job_id: str, api_key: str, estimated_pages: int = 10) -> dict:
"""Poll with parameters adjusted for document size."""
if estimated_pages <= 10:
return poll_with_backoff(
job_id, api_key,
initial_delay=2.0,
max_delay=5.0,
timeout=60.0
)
elif estimated_pages <= 50:
return poll_with_backoff(
job_id, api_key,
initial_delay=3.0,
max_delay=10.0,
timeout=180.0
)
else:
return poll_with_backoff(
job_id, api_key,
initial_delay=5.0,
max_delay=15.0,
timeout=600.0
)
function smartPoll(jobId, apiKey, estimatedPages = 10) {
if (estimatedPages <= 10) {
return pollWithBackoff(jobId, apiKey, {
initialDelay: 2000,
maxDelay: 5000,
timeout: 60000
});
} else if (estimatedPages <= 50) {
return pollWithBackoff(jobId, apiKey, {
initialDelay: 3000,
maxDelay: 10000,
timeout: 180000
});
} else {
return pollWithBackoff(jobId, apiKey, {
initialDelay: 5000,
maxDelay: 15000,
timeout: 600000
});
}
}
Handling Rate Limits
The API allows 60 requests per minute. If you hit the limit:
- Python
- Node.js
def poll_with_rate_limit_handling(job_id: str, api_key: str) -> dict:
"""Poll with rate limit handling."""
delay = 2.0
while True:
response = requests.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {api_key}"}
)
# Check for rate limiting
if response.status_code == 429:
retry_after = int(response.headers.get("Retry-After", 60))
print(f"Rate limited. Waiting {retry_after} seconds...")
time.sleep(retry_after)
continue
job = response.json()
if job["status"] in ["done", "failed"]:
return job
# Use rate limit headers to adjust polling
remaining = int(response.headers.get("RateLimit-Remaining", 60))
if remaining < 10:
delay = max(delay, 5.0) # Slow down if running low
time.sleep(delay)
delay = min(delay * 1.5, 10.0)
async function pollWithRateLimitHandling(jobId, apiKey) {
let delay = 2000;
while (true) {
const response = await fetch(
`https://api.knowhereto.ai/v1/jobs/${jobId}`,
{ headers: { 'Authorization': `Bearer ${apiKey}` } }
);
// Check for rate limiting
if (response.status === 429) {
const retryAfter = parseInt(response.headers.get('Retry-After') || '60');
console.log(`Rate limited. Waiting ${retryAfter} seconds...`);
await new Promise(r => setTimeout(r, retryAfter * 1000));
continue;
}
const job = await response.json();
if (['done', 'failed'].includes(job.status)) {
return job;
}
// Use rate limit headers to adjust polling
const remaining = parseInt(response.headers.get('RateLimit-Remaining') || '60');
if (remaining < 10) {
delay = Math.max(delay, 5000); // Slow down if running low
}
await new Promise(r => setTimeout(r, delay));
delay = Math.min(delay * 1.5, 10000);
}
}
Progress Tracking
For user-facing applications, show progress:
from tqdm import tqdm
def poll_with_progress_bar(job_id: str, api_key: str) -> dict:
"""Poll with a progress bar."""
pbar = None
delay = 2.0
try:
while True:
response = requests.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {api_key}"}
)
job = response.json()
if job["status"] == "done":
if pbar:
pbar.close()
return job
elif job["status"] == "failed":
if pbar:
pbar.close()
raise Exception(f"Job failed: {job['error']['message']}")
# Update progress bar
if "progress" in job:
p = job["progress"]
if pbar is None:
pbar = tqdm(total=p["total_pages"], desc="Parsing")
pbar.n = p["processed_pages"]
pbar.refresh()
time.sleep(delay)
delay = min(delay * 1.5, 10.0)
finally:
if pbar:
pbar.close()
Parallel Job Polling
When processing multiple documents, poll all jobs efficiently:
import asyncio
import aiohttp
async def poll_job_async(
session: aiohttp.ClientSession,
job_id: str,
api_key: str
) -> dict:
"""Async polling for a single job."""
delay = 2.0
while True:
async with session.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {api_key}"}
) as response:
job = await response.json()
if job["status"] in ["done", "failed"]:
return job
await asyncio.sleep(delay)
delay = min(delay * 1.5, 10.0)
async def poll_multiple_jobs(job_ids: list, api_key: str) -> list:
"""Poll multiple jobs concurrently."""
async with aiohttp.ClientSession() as session:
tasks = [
poll_job_async(session, job_id, api_key)
for job_id in job_ids
]
return await asyncio.gather(*tasks)
# Usage
job_ids = ["job_1", "job_2", "job_3"]
results = asyncio.run(poll_multiple_jobs(job_ids, api_key))
Best Practices Summary
- Start slow, speed up later: Begin with 2s delay, decrease if job is quick
- Use exponential backoff: Don't hammer the API with constant requests
- Set timeouts: Always have a maximum wait time
- Handle rate limits: Check for 429 responses and respect
Retry-After - Show progress: Use progress information for better UX
- Log appropriately: Log status changes, not every poll
Next Steps
- Error Handling - Handle polling failures
- Job Lifecycle - Understand all job states