Optional response model for Rank (#4)

rgambee · web-flow · commit b384ca9b135d · 2026-01-15T13:44:10.000-05:00
Users can now specify the response model for Rank
in case they want multiple output columns. It's
optional, so they can still specify only a column
name.
diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ from everyrow_sdk.ops import rank
 
 result = await rank(
     session=session,
-    task="Rank organizations by their contribution to AI research",
+    task="Score this organization by their contribution to AI research",
     input=dataframe,
     field_name="contribution_score",
     ascending_order=False,
@@ -164,7 +164,7 @@ from everyrow_sdk.ops import rank_async
 
 task = await rank_async(
     session=session,
-    task="Rank organizations",
+    task="Score this organization",
     input=dataframe,
     field_name="score",
 )
diff --git a/examples/rank_example.py b/examples/rank_example.py
@@ -3,40 +3,82 @@
 from textwrap import dedent
 
 from pandas import DataFrame
+from pydantic import BaseModel, Field
 
 from everyrow_sdk import create_client, create_session
 from everyrow_sdk.ops import rank
 from everyrow_sdk.session import Session
 
 
+class ContributionRanking(BaseModel):
+    contribution_score: float = Field(
+        description="Score from 0-100 reflecting contribution"
+    )
+    most_significant_contribution: str = Field(
+        description="Single most significant contribution"
+    )
+
+
 async def call_rank(session: Session):
     # Rank AI research organizations by their contributions to the field
     # This requires researching each org's publications, releases, and impact
     ai_research_orgs = DataFrame(
         [
-            {"organization": "OpenAI", "type": "Private lab", "founded": 2015},
+            {
+                "organization": "OpenAI",
+                "type": "Private lab",
+                "founded": 2015,
+            },
             {
                 "organization": "Google DeepMind",
                 "type": "Corporate lab",
                 "founded": 2010,
             },
-            {"organization": "Anthropic", "type": "Private lab", "founded": 2021},
-            {"organization": "Meta FAIR", "type": "Corporate lab", "founded": 2013},
+            {
+                "organization": "Anthropic",
+                "type": "Private lab",
+                "founded": 2021,
+            },
+            {
+                "organization": "Meta FAIR",
+                "type": "Corporate lab",
+                "founded": 2013,
+            },
             {
                 "organization": "Microsoft Research",
                 "type": "Corporate lab",
                 "founded": 1991,
             },
-            {"organization": "Stanford HAI", "type": "Academic", "founded": 2019},
-            {"organization": "MIT CSAIL", "type": "Academic", "founded": 2003},
+            {
+                "organization": "Stanford HAI",
+                "type": "Academic",
+                "founded": 2019,
+            },
+            {
+                "organization": "MIT CSAIL",
+                "type": "Academic",
+                "founded": 2003,
+            },
             {
                 "organization": "Berkeley AI Research",
                 "type": "Academic",
                 "founded": 2010,
             },
-            {"organization": "Mistral AI", "type": "Private lab", "founded": 2023},
-            {"organization": "xAI", "type": "Private lab", "founded": 2023},
-            {"organization": "Cohere", "type": "Private lab", "founded": 2019},
+            {
+                "organization": "Mistral AI",
+                "type": "Private lab",
+                "founded": 2023,
+            },
+            {
+                "organization": "xAI",
+                "type": "Private lab",
+                "founded": 2023,
+            },
+            {
+                "organization": "Cohere",
+                "type": "Private lab",
+                "founded": 2019,
+            },
             {
                 "organization": "Allen Institute for AI",
                 "type": "Non-profit",
@@ -45,22 +87,26 @@ async def call_rank(session: Session):
         ]
     )
 
-    result = await rank(
-        session=session,
-        task=dedent("""
-            Rank these AI research organizations by their overall contribution to
-            advancing large language models and generative AI in the past 2 years.
+    task = dedent("""
+        Score the given AI research organization by their overall contribution to
+        advancing large language models and generative AI in the past 2 years.
+
+        Consider factors such as:
+        - Influential model releases (both open and closed source)
+        - Important research papers and technical breakthroughs
+        - Impact on the broader AI ecosystem (open source contributions,
+            techniques that others have adopted)
+        - Novel capabilities introduced
 
-            Consider factors such as:
-            - Influential model releases (both open and closed source)
-            - Important research papers and technical breakthroughs
-            - Impact on the broader AI ecosystem (open source contributions,
-              techniques that others have adopted)
-            - Novel capabilities introduced
+        Assign a score from 0-100 reflecting their relative contribution,
+        where 100 represents the most impactful organization.
+    """)
 
-            Assign a score from 0-100 reflecting their relative contribution,
-            where 100 represents the most impactful organization.
-        """),
+    # Example 1: Basic ranking with a single score field
+    print("Example 1: Basic ranking")
+    result = await rank(
+        session=session,
+        task=task,
         input=ai_research_orgs,
         field_name="contribution_score",
         ascending_order=False,
@@ -69,6 +115,21 @@ async def call_rank(session: Session):
     print(result.data.to_string())
     print(f"\nArtifact ID: {result.artifact_id}")
 
+    # Example 2: Ranking with a custom response model for additional context
+    print("\n" + "=" * 80)
+    print("Example 2: Ranking with detailed response model")
+    detailed_result = await rank(
+        session=session,
+        task=task + "\n\nAlso include their single most significant contribution.",
+        input=ai_research_orgs,
+        field_name="contribution_score",
+        response_model=ContributionRanking,
+        ascending_order=False,
+    )
+    print("Detailed Rankings with Context:")
+    print(detailed_result.data.to_string())
+    print(f"\nArtifact ID: {detailed_result.artifact_id}")
+
 
 async def main():
     async with create_client() as client:
diff --git a/src/everyrow_sdk/ops.py b/src/everyrow_sdk/ops.py
@@ -126,7 +126,9 @@ async def single_agent_async[T: BaseModel](
         session_id=session.session_id,
     )
 
-    cohort_task = EveryrowTask(response_model=response_model, is_map=False, is_expand=return_table)
+    cohort_task = EveryrowTask(
+        response_model=response_model, is_map=False, is_expand=return_table
+    )
     await cohort_task.submit(body, session.client)
     return cohort_task
 
@@ -140,7 +142,9 @@ async def agent_map(
     response_model: type[BaseModel] = DefaultAgentResponse,
     return_table_per_row: bool = False,
 ) -> TableResult:
-    cohort_task = await agent_map_async(task, session, input, effort_level, llm, response_model, return_table_per_row)
+    cohort_task = await agent_map_async(
+        task, session, input, effort_level, llm, response_model, return_table_per_row
+    )
     result = await cohort_task.await_result(session.client)
     if isinstance(result, TableResult):
         return result
@@ -240,7 +244,9 @@ async def agent_map_async(
         session_id=session.session_id,
     )
 
-    cohort_task = EveryrowTask(response_model=response_model, is_map=True, is_expand=return_table_per_row)
+    cohort_task = EveryrowTask(
+        response_model=response_model, is_map=True, is_expand=return_table_per_row
+    )
     await cohort_task.submit(body, session.client)
     return cohort_task
 
@@ -283,7 +289,9 @@ async def create_scalar_artifact(input: BaseModel, session: Session) -> UUID:
 
 
 async def create_table_artifact(input: DataFrame, session: Session) -> UUID:
-    payload = CreateGroupRequest(query=CreateGroupQueryParams(data_to_create=input.to_dict(orient="records")))
+    payload = CreateGroupRequest(
+        query=CreateGroupQueryParams(data_to_create=input.to_dict(orient="records"))
+    )
     body = SubmitTaskBody(
         payload=payload,
         session_id=session.session_id,
@@ -371,12 +379,13 @@ async def merge_async(
     return cohort_task
 
 
-async def rank(
+async def rank[T: BaseModel](
     task: str,
     session: Session,
     input: DataFrame | UUID | TableResult,
     field_name: str,
     field_type: Literal["float", "int", "str", "bool"] = "float",
+    response_model: type[T] | None = None,
     ascending_order: bool = True,
     preview: bool = False,
 ) -> TableResult:
@@ -387,7 +396,8 @@ async def rank(
         session: The session to use
         input: The input table (DataFrame, UUID, or TableResult)
         field_name: The name of the field to extract and sort by
-        field_type: The type of the field (default: "float")
+        field_type: The type of the field (default: "float", ignored if response_model is provided)
+        response_model: Optional Pydantic model for the response schema
         ascending_order: If True, sort in ascending order
         preview: If True, process only the first few inputs
 
@@ -400,6 +410,7 @@ async def rank(
         input=input,
         field_name=field_name,
         field_type=field_type,
+        response_model=response_model,
         ascending_order=ascending_order,
         preview=preview,
     )
@@ -410,26 +421,33 @@ async def rank(
         raise EveryrowError("Rank task did not return a table result")
 
 
-async def rank_async(
+async def rank_async[T: BaseModel](
     task: str,
     session: Session,
     input: DataFrame | UUID | TableResult,
     field_name: str,
     field_type: Literal["float", "int", "str", "bool"] = "float",
+    response_model: type[T] | None = None,
     ascending_order: bool = True,
     preview: bool = False,
-) -> EveryrowTask[BaseModel]:
+) -> EveryrowTask[T]:
     """Submit a rank task asynchronously."""
     input_artifact_id = await _process_agent_map_input(input, session)
 
-    # Build response schema with single field
-    response_schema = {
-        "_model_name": "RankResponse",
-        field_name: {
-            "type": field_type,
-            "optional": False,
-        },
-    }
+    if response_model is not None:
+        response_schema = _convert_pydantic_to_custom_schema(response_model)
+        if field_name not in response_schema:
+            raise ValueError(
+                f"Field {field_name} not in response model {response_model.__name__}"
+            )
+    else:
+        response_schema = {
+            "_model_name": "RankResponse",
+            field_name: {
+                "type": field_type,
+                "optional": False,
+            },
+        }
 
     query = DeepRankPublicParams(
         task=task,
@@ -448,7 +466,11 @@ async def rank_async(
         session_id=session.session_id,
     )
 
-    cohort_task = EveryrowTask(response_model=BaseModel, is_map=True, is_expand=False)
+    cohort_task: EveryrowTask[T] = EveryrowTask(
+        response_model=response_model or BaseModel,  # type: ignore[arg-type]
+        is_map=True,
+        is_expand=False,
+    )
     await cohort_task.submit(body, session.client)
     return cohort_task
 
@@ -625,7 +647,8 @@ async def derive(
     input_artifact_id = await _process_agent_map_input(input, session)
 
     derive_expressions = [
-        DeriveExpression(column_name=col_name, expression=expr) for col_name, expr in expressions.items()
+        DeriveExpression(column_name=col_name, expression=expr)
+        for col_name, expr in expressions.items()
     ]
 
     query = DeriveQueryParams(expressions=derive_expressions)
diff --git a/tests/test_ops.py b/tests/test_ops.py
@@ -16,6 +16,7 @@
 from everyrow_sdk.ops import (
     agent_map,
     create_scalar_artifact,
+    rank_async,
     single_agent,
 )
 from everyrow_sdk.result import ScalarResult, TableResult
@@ -316,3 +317,38 @@ async def test_agent_map_with_table_output(mocker, mock_session):
     assert isinstance(result, TableResult)
     assert len(result.data) == 2
     assert result.artifact_id == artifact_id
+
+
+@pytest.mark.asyncio
+async def test_rank_model_validation(mocker, mock_session) -> None:
+    input_df = pd.DataFrame(
+        [
+            {"country": "China"},
+            {"country": "India"},
+            {"country": "Indonesia"},
+            {"country": "Pakistan"},
+            {"country": "USA"},
+        ],
+    )
+
+    class ResponseModel(BaseModel):
+        population_size: int
+
+    input_artifact_id = uuid.uuid4()
+    # Mock create_table_artifact (called because input is DataFrame)
+    mock_create_table = mocker.patch(
+        "everyrow_sdk.ops.create_table_artifact", new_callable=AsyncMock
+    )
+    mock_create_table.return_value = input_artifact_id
+
+    with pytest.raises(
+        ValueError,
+        match="Field population not in response model ResponseModel",
+    ):
+        await rank_async(
+            task="Find the population of the given country",
+            session=mock_session,
+            input=input_df,
+            field_name="population",
+            response_model=ResponseModel,
+        )