Python SDK
The official KnowledgeSDK Python SDK. Provides a clean, synchronous API with automatic retries, rate limit handling, and support for Python 3.8+.
Installation
pip install knowledgesdkQuick start
from knowledgesdk import KnowledgeSDK
ks = KnowledgeSDK("sk_ks_your_api_key")
# Extract structured knowledge from any website
result = ks.extract.run("https://competitor.com")
print(result["business"]["businessName"])
print(result["knowledgeItems"])
# Search across your extracted knowledge
results = ks.search.run("pricing plans")
for hit in results["hits"]:
print(f"{hit['title']} (score: {hit['score']})")Configuration
ks = KnowledgeSDK(
api_key="sk_ks_your_api_key",
base_url="https://api.knowledgesdk.com", # Default
timeout=30000, # Default (30 seconds, in ms)
max_retries=5, # Default
debug=False, # Default
)| Parameter | Type | Default | Description |
|---|---|---|---|
api_key | str | Required | Your KnowledgeSDK API key |
base_url | str | https://api.knowledgesdk.com | Base URL for the API |
timeout | int | 30000 | Request timeout in milliseconds |
max_retries | int | 5 | Maximum retries for failed requests |
debug | bool | False | Enable debug logging |
You can also set the KNOWLEDGESDK_BASE_URL environment variable to override the base URL without passing it explicitly.
Methods
ks.extract.run(url, **kwargs)
Run a synchronous extraction pipeline.
result = ks.extract.run("https://example.com", max_pages=20)
print(result["business"]["businessName"])
print(result["business"]["businessType"])
print(f"Scraped {result['pagesScraped']} pages")
for item in result["knowledgeItems"]:
print(f"- {item['title']}: {item['content'][:100]}")ks.extract.run_async(url, **kwargs)
Start an asynchronous extraction and return a job ID.
job = ks.extract.run_async(
"https://example.com",
callback_url="https://myapp.com/webhooks/knowledgesdk",
)
print(job["jobId"]) # "job_abc123"
print(job["status"]) # "PENDING"ks.extract.run(url) (single page)
Scrape a single URL and return clean markdown.
page = ks.extract.run("https://docs.example.com/getting-started")
print(page["title"])
print(page["markdown"])
print(page["links"])ks.business.run(url)
Classify a website's business type and competitive positioning.
biz = ks.business.run("https://stripe.com")
print(biz["businessName"]) # "Stripe"
print(biz["businessType"]) # "B2B_SAAS"
print(biz["industrySector"]) # "FINANCE"
print(biz["confidenceScore"]) # 95ks.screenshot.run(url)
Capture a full-page screenshot as base64 PNG.
import base64
shot = ks.screenshot.run("https://example.com")
image_data = base64.b64decode(shot["screenshot"])
with open("screenshot.png", "wb") as f:
f.write(image_data)ks.sitemap.run(url)
Discover all URLs on a website.
sitemap = ks.sitemap.run("https://example.com")
print(f"Found {sitemap['count']} URLs")
for url in sitemap["urls"]:
print(url)ks.search.run(query, **kwargs)
Search across indexed knowledge items.
results = ks.search.run("pricing plans", limit=10)
for hit in results["hits"]:
print(f"{hit['title']} (score: {hit['score']})")
print(hit["content"][:200])ks.webhooks.create(**kwargs)
Register a webhook endpoint.
wh = ks.webhooks.create(
url="https://myapp.com/hook",
events=["EXTRACTION_COMPLETED"],
)
print(wh["id"])ks.webhooks.list()
List all registered webhooks.
webhooks = ks.webhooks.list()
for wh in webhooks:
print(f"{wh['displayName']}: {wh['status']}")ks.webhooks.delete(webhook_id)
Delete a webhook by ID.
ks.webhooks.delete("wh_abc123")ks.jobs.get(job_id)
Get the status of an async job.
job = ks.jobs.get("job_abc123")
print(job["status"]) # "COMPLETED"ks.jobs.poll(job_id, **kwargs)
Poll a job until completion.
completed = ks.jobs.poll(
"job_abc123",
interval_sec=5,
timeout_sec=300,
)
print(completed["result"])Error handling
from knowledgesdk import KnowledgeSDK
from knowledgesdk.errors import (
AuthenticationError,
RateLimitError,
APIError,
)
ks = KnowledgeSDK("sk_ks_your_api_key")
try:
result = ks.extract.run("https://example.com")
except AuthenticationError:
print("Invalid API key")
except RateLimitError:
print("Rate limited -- retry after backoff")
except APIError as e:
print(f"API error: {e}")Debug mode
Enable debug logging to see all HTTP requests and responses:
ks = KnowledgeSDK("sk_ks_your_api_key", debug=True)
# Or toggle at runtime
ks.set_debug_mode(True)