Quick Start
Parse your first document with Knowhere API in under 5 minutes.
Prerequisites
- A Knowhere API key (Get one here)
- A document to parse (PDF, DOCX, XLSX, or PPTX)
Step 1: Set Your API Key
export KNOWHERE_API_KEY="your_api_key_here"
Step 2: Create a Parsing Job
You have two options for submitting documents:
Option A: Parse from URL
If your document is publicly accessible:
- cURL
- Python
- Node.js
curl -X POST https://api.knowhereto.ai/v1/jobs \
-H "Authorization: Bearer $KNOWHERE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"source_type": "url",
"source_url": "https://example.com/document.pdf"
}'
import requests
response = requests.post(
"https://api.knowhereto.ai/v1/jobs",
headers={
"Authorization": f"Bearer {KNOWHERE_API_KEY}",
"Content-Type": "application/json"
},
json={
"source_type": "url",
"source_url": "https://example.com/document.pdf"
}
)
job = response.json()
print(f"Job ID: {job['job_id']}")
const response = await fetch('https://api.knowhereto.ai/v1/jobs', {
method: 'POST',
headers: {
'Authorization': `Bearer ${KNOWHERE_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
source_type: 'url',
source_url: 'https://example.com/document.pdf'
})
});
const job = await response.json();
console.log(`Job ID: ${job.job_id}`);
Option B: Upload a Local File
For local files, first create a job to get an upload URL:
- cURL
- Python
- Node.js
# Step 1: Create job and get upload URL
curl -X POST https://api.knowhereto.ai/v1/jobs \
-H "Authorization: Bearer $KNOWHERE_API_KEY" \
-H "Content-Type: application/json" \
-d '{
"source_type": "file",
"file_name": "document.pdf"
}'
# Response includes upload_url and upload_headers
# Step 2: Upload the file
curl -X PUT "UPLOAD_URL_FROM_RESPONSE" \
-H "Content-Type: application/pdf" \
--data-binary @document.pdf
import requests
# Step 1: Create job and get upload URL
response = requests.post(
"https://api.knowhereto.ai/v1/jobs",
headers={
"Authorization": f"Bearer {KNOWHERE_API_KEY}",
"Content-Type": "application/json"
},
json={
"source_type": "file",
"file_name": "document.pdf"
}
)
job = response.json()
upload_url = job["upload_url"]
upload_headers = job.get("upload_headers", {})
# Step 2: Upload the file
with open("document.pdf", "rb") as f:
upload_response = requests.put(
upload_url,
headers=upload_headers,
data=f.read()
)
print(f"Job ID: {job['job_id']}")
print(f"Upload status: {upload_response.status_code}")
import fs from 'fs';
// Step 1: Create job and get upload URL
const response = await fetch('https://api.knowhereto.ai/v1/jobs', {
method: 'POST',
headers: {
'Authorization': `Bearer ${KNOWHERE_API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
source_type: 'file',
file_name: 'document.pdf'
})
});
const job = await response.json();
const { upload_url, upload_headers } = job;
// Step 2: Upload the file
const fileBuffer = fs.readFileSync('document.pdf');
const uploadResponse = await fetch(upload_url, {
method: 'PUT',
headers: upload_headers,
body: fileBuffer
});
console.log(`Job ID: ${job.job_id}`);
console.log(`Upload status: ${uploadResponse.status}`);
Step 3: Poll for Results
- cURL
- Python
- Node.js
curl https://api.knowhereto.ai/v1/jobs/JOB_ID \
-H "Authorization: Bearer $KNOWHERE_API_KEY"
import time
import requests
job_id = "your_job_id"
while True:
response = requests.get(
f"https://api.knowhereto.ai/v1/jobs/{job_id}",
headers={"Authorization": f"Bearer {KNOWHERE_API_KEY}"}
)
job = response.json()
print(f"Status: {job['status']}")
if job["status"] == "done":
print(f"Result URL: {job['result_url']}")
break
elif job["status"] == "failed":
print(f"Error: {job['error']}")
break
time.sleep(5) # Wait 5 seconds before polling again
const jobId = 'your_job_id';
async function pollForResult() {
while (true) {
const response = await fetch(`https://api.knowhereto.ai/v1/jobs/${jobId}`, {
headers: { 'Authorization': `Bearer ${KNOWHERE_API_KEY}` }
});
const job = await response.json();
console.log(`Status: ${job.status}`);
if (job.status === 'done') {
console.log(`Result URL: ${job.result_url}`);
return job;
} else if (job.status === 'failed') {
console.error(`Error: ${JSON.stringify(job.error)}`);
throw new Error(job.error.message);
}
await new Promise(resolve => setTimeout(resolve, 5000));
}
}
await pollForResult();
Step 4: Download and Use Results
Once the job is complete, download the ZIP file from result_url:
- cURL
- Python
- Node.js
# Download the result ZIP
curl -o result.zip "RESULT_URL_FROM_RESPONSE"
# Extract
unzip result.zip -d result/
# View the chunks
cat result/chunks.json | jq '.chunks[0]'
import requests
import zipfile
import json
from io import BytesIO
# Download the ZIP
result_url = job["result_url"]
response = requests.get(result_url)
# Extract and parse
with zipfile.ZipFile(BytesIO(response.content)) as zf:
chunks_data = json.loads(zf.read("chunks.json"))
# Use the chunks
for chunk in chunks_data["chunks"]:
print(f"Type: {chunk['type']}")
print(f"Content: {chunk['content'][:100]}...")
print("---")
import AdmZip from 'adm-zip';
// Download the ZIP
const resultUrl = job.result_url;
const response = await fetch(resultUrl);
const buffer = Buffer.from(await response.arrayBuffer());
// Extract and parse
const zip = new AdmZip(buffer);
const chunksJson = zip.readAsText('chunks.json');
const chunksData = JSON.parse(chunksJson);
// Use the chunks
for (const chunk of chunksData.chunks) {
console.log(`Type: ${chunk.type}`);
console.log(`Content: ${chunk.content.substring(0, 100)}...`);
console.log('---');
}
Result Structure
The downloaded ZIP contains:
result.zip
├── manifest.json # Metadata and file index
├── chunks.json # All chunks with content and metadata
├── content.md # Full document as Markdown (optional)
├── images/ # Extracted images
└── tables/ # Extracted tables as HTML
See Result Handling for detailed documentation on the result format.
Next Steps
- Authentication - Learn about API key management
- Core Concepts - Understand how the API works
- API Reference - Complete endpoint documentation
- Error Handling - Handle errors gracefully