Quick Start

Parse your first document with Knowhere API in under 5 minutes.

Prerequisites

A Knowhere API key (Get one here)
A document to parse (PDF, DOCX, XLSX, or PPTX)

Step 1: Set Your API Key

export KNOWHERE_API_KEY="your_api_key_here"

Step 2: Create a Parsing Job

You have two options for submitting documents:

Option A: Parse from URL

If your document is publicly accessible:

cURL
Python
Node.js

curl -X POST https://api.knowhereto.ai/v1/jobs \
  -H "Authorization: Bearer $KNOWHERE_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "source_type": "url",
    "source_url": "https://example.com/document.pdf"
  }'

import requests

response = requests.post(
    "https://api.knowhereto.ai/v1/jobs",
    headers={
        "Authorization": f"Bearer {KNOWHERE_API_KEY}",
        "Content-Type": "application/json"
    },
    json={
        "source_type": "url",
        "source_url": "https://example.com/document.pdf"
    }
)

job = response.json()
print(f"Job ID: {job['job_id']}")

const response = await fetch('https://api.knowhereto.ai/v1/jobs', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${KNOWHERE_API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    source_type: 'url',
    source_url: 'https://example.com/document.pdf'
  })
});

const job = await response.json();
console.log(`Job ID: ${job.job_id}`);

Option B: Upload a Local File

For local files, first create a job to get an upload URL:

cURL
Python
Node.js

# Step 1: Create job and get upload URL
curl -X POST https://api.knowhereto.ai/v1/jobs \
  -H "Authorization: Bearer $KNOWHERE_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "source_type": "file",
    "file_name": "document.pdf"
  }'

# Response includes upload_url and upload_headers
# Step 2: Upload the file
curl -X PUT "UPLOAD_URL_FROM_RESPONSE" \
  -H "Content-Type: application/pdf" \
  --data-binary @document.pdf

import requests

# Step 1: Create job and get upload URL
response = requests.post(
    "https://api.knowhereto.ai/v1/jobs",
    headers={
        "Authorization": f"Bearer {KNOWHERE_API_KEY}",
        "Content-Type": "application/json"
    },
    json={
        "source_type": "file",
        "file_name": "document.pdf"
    }
)

job = response.json()
upload_url = job["upload_url"]
upload_headers = job.get("upload_headers", {})

# Step 2: Upload the file
with open("document.pdf", "rb") as f:
    upload_response = requests.put(
        upload_url,
        headers=upload_headers,
        data=f.read()
    )

print(f"Job ID: {job['job_id']}")
print(f"Upload status: {upload_response.status_code}")

import fs from 'fs';

// Step 1: Create job and get upload URL
const response = await fetch('https://api.knowhereto.ai/v1/jobs', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${KNOWHERE_API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({
    source_type: 'file',
    file_name: 'document.pdf'
  })
});

const job = await response.json();
const { upload_url, upload_headers } = job;

// Step 2: Upload the file
const fileBuffer = fs.readFileSync('document.pdf');
const uploadResponse = await fetch(upload_url, {
  method: 'PUT',
  headers: upload_headers,
  body: fileBuffer
});

console.log(`Job ID: ${job.job_id}`);
console.log(`Upload status: ${uploadResponse.status}`);

Step 3: Poll for Results

cURL
Python
Node.js

curl https://api.knowhereto.ai/v1/jobs/JOB_ID \
  -H "Authorization: Bearer $KNOWHERE_API_KEY"

import time
import requests

job_id = "your_job_id"

while True:
    response = requests.get(
        f"https://api.knowhereto.ai/v1/jobs/{job_id}",
        headers={"Authorization": f"Bearer {KNOWHERE_API_KEY}"}
    )
    job = response.json()
    
    print(f"Status: {job['status']}")
    
    if job["status"] == "done":
        print(f"Result URL: {job['result_url']}")
        break
    elif job["status"] == "failed":
        print(f"Error: {job['error']}")
        break
    
    time.sleep(5)  # Wait 5 seconds before polling again

const jobId = 'your_job_id';

async function pollForResult() {
  while (true) {
    const response = await fetch(`https://api.knowhereto.ai/v1/jobs/${jobId}`, {
      headers: { 'Authorization': `Bearer ${KNOWHERE_API_KEY}` }
    });
    const job = await response.json();
    
    console.log(`Status: ${job.status}`);
    
    if (job.status === 'done') {
      console.log(`Result URL: ${job.result_url}`);
      return job;
    } else if (job.status === 'failed') {
      console.error(`Error: ${JSON.stringify(job.error)}`);
      throw new Error(job.error.message);
    }
    
    await new Promise(resolve => setTimeout(resolve, 5000));
  }
}

await pollForResult();

Step 4: Download and Use Results

Once the job is complete, download the ZIP file from result_url:

cURL
Python
Node.js

# Download the result ZIP
curl -o result.zip "RESULT_URL_FROM_RESPONSE"

# Extract
unzip result.zip -d result/

# View the chunks
cat result/chunks.json | jq '.chunks[0]'

import requests
import zipfile
import json
from io import BytesIO

# Download the ZIP
result_url = job["result_url"]
response = requests.get(result_url)

# Extract and parse
with zipfile.ZipFile(BytesIO(response.content)) as zf:
    chunks_data = json.loads(zf.read("chunks.json"))

# Use the chunks
for chunk in chunks_data["chunks"]:
    print(f"Type: {chunk['type']}")
    print(f"Content: {chunk['content'][:100]}...")
    print("---")

import AdmZip from 'adm-zip';

// Download the ZIP
const resultUrl = job.result_url;
const response = await fetch(resultUrl);
const buffer = Buffer.from(await response.arrayBuffer());

// Extract and parse
const zip = new AdmZip(buffer);
const chunksJson = zip.readAsText('chunks.json');
const chunksData = JSON.parse(chunksJson);

// Use the chunks
for (const chunk of chunksData.chunks) {
  console.log(`Type: ${chunk.type}`);
  console.log(`Content: ${chunk.content.substring(0, 100)}...`);
  console.log('---');
}

Result Structure

The downloaded ZIP contains:

result.zip
├── manifest.json    # Metadata and file index
├── chunks.json      # All chunks with content and metadata
├── content.md       # Full document as Markdown (optional)
├── images/          # Extracted images
└── tables/          # Extracted tables as HTML

See Result Handling for detailed documentation on the result format.

Next Steps

Authentication - Learn about API key management
Core Concepts - Understand how the API works
API Reference - Complete endpoint documentation
Error Handling - Handle errors gracefully

Prerequisites​

Step 1: Set Your API Key​

Step 2: Create a Parsing Job​

Option A: Parse from URL​

Option B: Upload a Local File​

Step 3: Poll for Results​

Step 4: Download and Use Results​

Result Structure​

Next Steps​