Python SDK
docs/SDKs/Python SDK

Python SDK

Official Python SDK.

Python SDK

The official KnowledgeSDK Python SDK. Provides a clean, synchronous API with automatic retries, rate limit handling, and support for Python 3.8+.

Installation

terminal>_bash
pip install knowledgesdk

Quick start

python snippetPYpython
from knowledgesdk import KnowledgeSDK

ks = KnowledgeSDK("sk_ks_your_api_key")

# Extract structured knowledge from any website
result = ks.extract.run("https://competitor.com")
print(result["business"]["businessName"])
print(result["knowledgeItems"])

# Search across your extracted knowledge
results = ks.search.run("pricing plans")
for hit in results["hits"]:
    print(f"{hit['title']} (score: {hit['score']})")

Configuration

python snippetPYpython
ks = KnowledgeSDK(
    api_key="sk_ks_your_api_key",
    base_url="https://api.knowledgesdk.com",  # Default
    timeout=30000,                             # Default (30 seconds, in ms)
    max_retries=5,                             # Default
    debug=False,                               # Default
)
ParameterTypeDefaultDescription
api_keystrRequiredYour KnowledgeSDK API key
base_urlstrhttps://api.knowledgesdk.comBase URL for the API
timeoutint30000Request timeout in milliseconds
max_retriesint5Maximum retries for failed requests
debugboolFalseEnable debug logging

You can also set the KNOWLEDGESDK_BASE_URL environment variable to override the base URL without passing it explicitly.

Methods

ks.extract.run(url, **kwargs)

Run a synchronous extraction pipeline.

python snippetPYpython
result = ks.extract.run("https://example.com", max_pages=20)

print(result["business"]["businessName"])
print(result["business"]["businessType"])
print(f"Scraped {result['pagesScraped']} pages")

for item in result["knowledgeItems"]:
    print(f"- {item['title']}: {item['content'][:100]}")

ks.extract.run_async(url, **kwargs)

Start an asynchronous extraction and return a job ID.

python snippetPYpython
job = ks.extract.run_async(
    "https://example.com",
    callback_url="https://myapp.com/webhooks/knowledgesdk",
)

print(job["jobId"])   # "job_abc123"
print(job["status"])  # "PENDING"

ks.extract.run(url) (single page)

Scrape a single URL and return clean markdown.

python snippetPYpython
page = ks.extract.run("https://docs.example.com/getting-started")

print(page["title"])
print(page["markdown"])
print(page["links"])

ks.business.run(url)

Classify a website's business type and competitive positioning.

python snippetPYpython
biz = ks.business.run("https://stripe.com")

print(biz["businessName"])      # "Stripe"
print(biz["businessType"])      # "B2B_SAAS"
print(biz["industrySector"])    # "FINANCE"
print(biz["confidenceScore"])   # 95

ks.screenshot.run(url)

Capture a full-page screenshot as base64 PNG.

python snippetPYpython
import base64

shot = ks.screenshot.run("https://example.com")

image_data = base64.b64decode(shot["screenshot"])
with open("screenshot.png", "wb") as f:
    f.write(image_data)

ks.sitemap.run(url)

Discover all URLs on a website.

python snippetPYpython
sitemap = ks.sitemap.run("https://example.com")

print(f"Found {sitemap['count']} URLs")
for url in sitemap["urls"]:
    print(url)

ks.search.run(query, **kwargs)

Search across indexed knowledge items.

python snippetPYpython
results = ks.search.run("pricing plans", limit=10)

for hit in results["hits"]:
    print(f"{hit['title']} (score: {hit['score']})")
    print(hit["content"][:200])

ks.webhooks.create(**kwargs)

Register a webhook endpoint.

python snippetPYpython
wh = ks.webhooks.create(
    url="https://myapp.com/hook",
    events=["EXTRACTION_COMPLETED"],
)
print(wh["id"])

ks.webhooks.list()

List all registered webhooks.

python snippetPYpython
webhooks = ks.webhooks.list()
for wh in webhooks:
    print(f"{wh['displayName']}: {wh['status']}")

ks.webhooks.delete(webhook_id)

Delete a webhook by ID.

python snippetPYpython
ks.webhooks.delete("wh_abc123")

ks.jobs.get(job_id)

Get the status of an async job.

python snippetPYpython
job = ks.jobs.get("job_abc123")
print(job["status"])  # "COMPLETED"

ks.jobs.poll(job_id, **kwargs)

Poll a job until completion.

python snippetPYpython
completed = ks.jobs.poll(
    "job_abc123",
    interval_sec=5,
    timeout_sec=300,
)
print(completed["result"])

Error handling

python snippetPYpython
from knowledgesdk import KnowledgeSDK
from knowledgesdk.errors import (
    AuthenticationError,
    RateLimitError,
    APIError,
)

ks = KnowledgeSDK("sk_ks_your_api_key")

try:
    result = ks.extract.run("https://example.com")
except AuthenticationError:
    print("Invalid API key")
except RateLimitError:
    print("Rate limited -- retry after backoff")
except APIError as e:
    print(f"API error: {e}")

Debug mode

Enable debug logging to see all HTTP requests and responses:

python snippetPYpython
ks = KnowledgeSDK("sk_ks_your_api_key", debug=True)

# Or toggle at runtime
ks.set_debug_mode(True)