Cohort SDK Reference

The Cohort API provides programmatic access to patient cohort queries, counts, and demographic breakdowns.

Accessing the API

Access the Cohort API through the client.alpha.cohort namespace:

from komodo import Client

client = Client()
cohort_api = client.alpha.cohort

approx_count

Get a fast approximate patient count for a cohort query.

Import

from komodo import Client

Function Signature

def approx_count(
    logic: Dict[str, Any],
    patient_type: str = "adv",
    date_range: Optional[DateRange] = None,
    validate_values: bool = False,
) -> ApproxCountResponse

Parameters

Name	Type	Description
`logic`	`Dict[str, Any]`	The cohort logic tree (e.g., `{"mx_diagnosis_code": "C34%"}`).
`patient_type`	`str`	Patient type to query (default: `"adv"`).
`date_range`	`DateRange \| None`	Optional date range filter.
`validate_values`	`bool`	Whether to validate code values against the database.

Returns

Type	Description
`ApproxCountResponse`	Response containing `total_count` and optional `warnings`.

Example

from komodo import Client

client = Client()

# Simple query with single diagnosis code
response = client.alpha.cohort.approx_count(
    logic={"mx_diagnosis_code": "C34%"},
)
print(f"Approximate count: {response.total_count}")

# Query with multiple codes (OR logic)
response = client.alpha.cohort.approx_count(
    logic={"mx_diagnosis_code": ["C34%", "C50%"]},
)
print(f"Patients with lung or breast cancer: {response.total_count}")

distinct_count

Get an exact distinct patient count for a cohort query. Requires a date range.

Function Signature

def distinct_count(
    logic: Dict[str, Any],
    date_range: DateRange,
    patient_type: str = "adv",
    granularity: Optional[Literal["year", "quarter", "month"]] = None,
    patient_criteria: Optional[Dict[str, Any]] = None,
) -> DistinctCountResponse

Parameters

Name	Type	Description
`logic`	`Dict[str, Any]`	The cohort logic tree.
`date_range`	`DateRange`	Required date range filter.
`patient_type`	`str`	Patient type to query (default: `"adv"`).
`granularity`	`Literal["year", "quarter", "month"] \| None`	Optional time granularity for bucketed counts.
`patient_criteria`	`Dict[str, Any] \| None`	Optional patient-level filter criteria.

Returns

Type	Description
`DistinctCountResponse`	Response containing `total`, optional `counts` (time-bucketed), and `warnings`.

Example

from komodo import Client
from komodo.alpha.models import DateRange

client = Client()

# Get distinct count with date range
response = client.alpha.cohort.distinct_count(
    logic={"mx_diagnosis_code": "C34%"},
    date_range=DateRange(from_date="2023-01-01", to_date="2023-12-31"),
)
print(f"Distinct patients: {response.total}")

# Get counts by year
response = client.alpha.cohort.distinct_count(
    logic={"mx_diagnosis_code": "C34%"},
    date_range=DateRange(from_date="2020-01-01", to_date="2023-12-31"),
    granularity="year",
)
print(f"Total: {response.total}")
for year, count in response.counts.items():
    print(f"  {year}: {count}")

patient_ids

Get the list of patient IDs matching a cohort query.

Function Signature

def patient_ids(
    logic: Dict[str, Any],
    patient_type: str = "adv",
    date_range: Optional[DateRange] = None,
    validate_values: bool = False,
    limit: Optional[int] = None,
) -> PatientIdsResponse

Parameters

Name	Type	Description
`logic`	`Dict[str, Any]`	The cohort logic tree.
`patient_type`	`str`	Patient type to query (default: `"adv"`).
`date_range`	`DateRange \| None`	Optional date range filter.
`validate_values`	`bool`	Whether to validate code values.
`limit`	`int \| None`	Maximum number of patient IDs to return.

Returns

Type	Description
`PatientIdsResponse`	Response containing `total_count`, `patient_ids` list, and `warnings`.

Example

from komodo import Client

client = Client()

response = client.alpha.cohort.patient_ids(
    logic={"mx_diagnosis_code": "C34%"},
    limit=1000,
)
print(f"Found {response.total_count} patients")
print(f"Returned {len(response.patient_ids)} IDs")

export_patient_ids

Start an asynchronous export of patient IDs to S3.

Function Signature

def export_patient_ids(
    logic: Dict[str, Any],
    patient_type: str = "adv",
    date_range: Optional[DateRange] = None,
    validate_values: bool = False,
) -> ExportStatusResponse

Returns

Type	Description
`ExportStatusResponse`	Response containing `job_id` and initial `status`.

Example

from komodo import Client

client = Client()

# Start export
response = client.alpha.cohort.export_patient_ids(
    logic={"mx_diagnosis_code": "C34%"},
)
print(f"Export job started: {response.job_id}")
print(f"Status: {response.status}")

get_export_status

Get the status of an export job.

Function Signature

def get_export_status(
    job_id: str,
    verbose: bool = False,
) -> ExportStatusResponse

Parameters

Name	Type	Description
`job_id`	`str`	The export job ID.
`verbose`	`bool`	If `True`, include detailed worker timing info.

Returns

Type	Description
`ExportStatusResponse`	Response with job `status`, `s3_directory` (when complete), and optional error info.

Example

import time
from komodo import Client
from komodo.alpha.models import ExportStatus

client = Client()

# Start an export and get the job_id
export_response = client.alpha.cohort.export_patient_ids(
    logic={"mx_diagnosis_code": "C34%"},
)
job_id = export_response.job_id

# Poll for completion
while True:
    status = client.alpha.cohort.get_export_status(job_id)
    print(f"Status: {status.status}")

    if status.status == ExportStatus.COMPLETED:
        print(f"Files at: {status.s3_directory}")
        break
    elif status.status == ExportStatus.FAILED:
        print(f"Export failed: {status.error}")
        break

    time.sleep(5)

lookup_codes

Look up available codes for a selector type.

Function Signature

def lookup_codes(
    selector_type: str,
    code: Optional[str] = None,
    pattern: Optional[str] = None,
    include_indices: bool = False,
    limit: Optional[int] = None,
) -> CodesResponse

Parameters

Name	Type	Description
`selector_type`	`str`	The selector type (e.g., `"mx_diagnosis_code"`).
`code`	`str \| None`	Exact code to look up.
`pattern`	`str \| None`	Wildcard pattern with `%` suffix for prefix matching.
`include_indices`	`bool`	If `True`, include index ranges in the response.
`limit`	`int \| None`	Maximum results to return (1-100, default: 100).

Returns

Type	Description
`CodesResponse`	Response containing `codes` (list of `CodeEntry` objects) and `truncated` (bool indicating if more results exist).

Example

from komodo import Client

client = Client()

# Find all lung cancer diagnosis codes
codes = client.alpha.cohort.lookup_codes(
    selector_type="mx_diagnosis_code",
    pattern="C34%",
    limit=20,
)
for entry in codes.codes:
    print(entry.code)

list_selectors

List available selector types.

Function Signature

def list_selectors() -> SelectorsResponse

Returns

Type	Description
`SelectorsResponse`	Response containing `selectors` (list of `SelectorInfo` objects with `name` and `description` fields).

Example

from komodo import Client

client = Client()

selectors = client.alpha.cohort.list_selectors()
for s in selectors.selectors:
    print(f"{s.name}: {s.description}")

get_total_count

Convenience method that gets patient count using either approximate or distinct counting based on the specified count type.

Function Signature

def get_total_count(
    logic: Dict[str, Any],
    count_type: CountType,
    date_range: Optional[DateRange] = None,
    patient_type: str = "adv",
) -> Tuple[int, Union[ApproxCountResponse, DistinctCountResponse]]

Parameters

Name	Type	Description
`logic`	`Dict[str, Any]`	The cohort logic tree.
`count_type`	`CountType`	Whether to use approximate or distinct counting.
`date_range`	`DateRange \| None`	Date range filter (required for distinct counts).
`patient_type`	`str`	Patient type to query (default: `"adv"`).

Returns

Type	Description
`Tuple[int, ApproxCountResponse \| DistinctCountResponse]`	Tuple of (count, response) where response contains additional info.

Example

from komodo import Client
from komodo.alpha.models import CountType, DateRange

client = Client()

# Get approximate count
count, response = client.alpha.cohort.get_total_count(
    logic={"mx_diagnosis_code": "C34%"},
    count_type=CountType.APPROXIMATE,
)
print(f"Approximate count: {count}")

# Get distinct count (requires date range)
count, response = client.alpha.cohort.get_total_count(
    logic={"mx_diagnosis_code": "C34%"},
    count_type=CountType.DISTINCT,
    date_range=DateRange(from_date="2023-01-01", to_date="2023-12-31"),
)
print(f"Distinct count: {count}")

health

Check the Cohort API orchestrator health status.

Function Signature

def health(
    deep: bool = False,
) -> HealthResponse

Parameters

Name	Type	Description
`deep`	`bool`	If `True`, return detailed health information about workers, Redis, and indexers.

Returns

Type	Description
`HealthResponse`	Response containing `status` and optional detailed health info (`workers`, `redis`, `indexer`).

Example

from komodo import Client

client = Client()

# Basic health check
health = client.alpha.cohort.health()
print(f"Status: {health.status}")

# Detailed health check
health = client.alpha.cohort.health(deep=True)
print(f"Status: {health.status}")
if health.workers:
    print(f"Workers: {health.workers.connected}/{health.workers.total}")

get_stats

Get aggregated request metrics for Cohort API endpoints.

Function Signature

def get_stats(
    time_range: Optional[Literal["1h", "6h", "24h"]] = None,
    interval: Optional[Literal["1m", "5m", "10m", "1h"]] = None,
) -> StatsResponse

Parameters

Name	Type	Description
`time_range`	`Literal["1h", "6h", "24h"] \| None`	Time range for metrics (default: `"1h"`).
`interval`	`Literal["1m", "5m", "10m", "1h"] \| None`	Bucket interval for time-series data (default: `"1m"`).

Returns

Type	Description
`StatsResponse`	Response containing `current_minute`, `last_hour`, optional `buckets`, and `response_cache` stats.

Example

from komodo import Client

client = Client()

# Get last hour metrics
stats = client.alpha.cohort.get_stats(time_range="1h")
print(f"Current minute: {stats.current_minute}")
print(f"Last hour: {stats.last_hour}")

# Get 24h metrics with hourly buckets
stats = client.alpha.cohort.get_stats(time_range="24h", interval="1h")
if stats.buckets:
    for bucket in stats.buckets:
        print(bucket)

list_audit_logs

Query audit logs for Cohort API requests.

Function Signature

def list_audit_logs(
    from_time: Optional[str] = None,
    to_time: Optional[str] = None,
    limit: Optional[int] = None,
) -> List[AuditEntry]

Parameters

Name	Type	Description
`from_time`	`str \| None`	Filter entries after this time (RFC3339 format, e.g., `"2023-01-01T00:00:00Z"`).
`to_time`	`str \| None`	Filter entries before this time (RFC3339 format).
`limit`	`int \| None`	Maximum entries to return (default: 100, max: 1000).

Returns

Type	Description
`List[AuditEntry]`	List of audit entries sorted by timestamp descending (newest first). Each entry contains `endpoint`, `response_code`, `latency_ms`, `timestamp`, and optional fields.

Example

from komodo import Client

client = Client()

# Get recent audit logs
logs = client.alpha.cohort.list_audit_logs(limit=50)
for entry in logs:
    print(f"{entry.timestamp}: {entry.endpoint} - {entry.response_code} ({entry.latency_ms}ms)")

# Get logs for a specific time range
logs = client.alpha.cohort.list_audit_logs(
    from_time="2023-06-01T00:00:00Z",
    to_time="2023-06-30T23:59:59Z",
    limit=100,
)

Building Complex Queries

Logic Tree Format

The logic parameter accepts a dictionary representing the query logic:

# Single condition
{"mx_diagnosis_code": "C34%"}

# Multiple values (OR logic)
{"mx_diagnosis_code": ["C34%", "C50%"]}

# AND logic
{
    "op": "and",
    "conditions": [
        {"mx_diagnosis_code": "C34%"},
        {"cpt_hcpcs_code": "96413"},
    ]
}

# OR logic
{
    "op": "or",
    "conditions": [
        {"mx_diagnosis_code": "C34%"},
        {"mx_diagnosis_code": "C50%"},
    ]
}

# Nested logic
{
    "op": "and",
    "conditions": [
        {"mx_diagnosis_code": "C34%"},
        {
            "op": "or",
            "conditions": [
                {"cpt_hcpcs_code": "96413"},
                {"cpt_hcpcs_code": "96415"},
            ]
        }
    ]
}

Using Condition and ConditionGroup Classes

For type-safe query building, use the model classes:

from komodo.alpha.models import Condition, ConditionGroup, SelectorType

# Create conditions
diagnosis = Condition(
    selector_type=SelectorType.MX_DIAGNOSIS_CODE,
    values=["C34%", "C50%"]
)

procedure = Condition(
    selector_type=SelectorType.CPT_HCPCS_CODE,
    values=["96413"]
)

# Combine with AND logic
query = ConditionGroup(
    operator="and",
    items=[diagnosis, procedure]
)

# Convert to API format
logic = query.to_logic()

Models Reference

DateRange

from datetime import date
from komodo.alpha.models import DateRange

# From date objects
date_range = DateRange(
    from_date=date(2023, 1, 1),
    to_date=date(2023, 12, 31)
)

# From strings (ISO format)
date_range = DateRange(
    from_date="2023-01-01",
    to_date="2023-12-31"
)

# From strings (mm/dd/yy format)
date_range = DateRange.from_strings("01/01/23", "12/31/23")

SelectorType

Available selector types:

Value	Description	Examples
`mx_diagnosis_code`	ICD-10 diagnosis codes	`C34%`, `C50.1`, `Z51.11`
`cpt_hcpcs_code`	CPT/HCPCS procedure codes	`96413`, `99213`, `J9271`
`ndc`	National Drug Codes	`12345678901`
`drg_code`	DRG codes	`470`, `871`
`revenue_code`	Revenue codes	`0636`, `0250`

CountType

from komodo.alpha.models import CountType

CountType.APPROXIMATE  # Fast approximate count
CountType.DISTINCT     # Exact distinct count (requires date range)

Response Models

Model	Fields
`ApproxCountResponse`	`total_count`, `warnings`
`DistinctCountResponse`	`total`, `counts`, `warnings`
`PatientIdsResponse`	`total_count`, `patient_ids`, `returned_count`, `warnings`
`ExportStatusResponse`	`job_id`, `status`, `s3_directory`, `error`, `total_patient_count`
`CodesResponse`	`codes`, `truncated`
`SelectorsResponse`	`selectors`
`HealthResponse`	`status`, `workers`, `redis`, `indexer`
`StatsResponse`	`current_minute`, `last_hour`, `buckets`, `response_cache`
`AuditEntry`	`endpoint`, `response_code`, `latency_ms`, `timestamp`, `request_id`, `error`

Async Methods

All API methods have async variants with the _async suffix:

import asyncio
from komodo import Client

async def get_count():
    client = Client()
    response = await client.alpha.cohort.approx_count_async(
        logic={"mx_diagnosis_code": "C34%"},
    )
    return response.total_count

count = asyncio.run(get_count())

Available async methods:

approx_count_async
distinct_count_async
patient_ids_async
export_patient_ids_async
get_export_status_async
lookup_codes_async
list_selectors_async
health_async
get_stats_async
list_audit_logs_async