Additional test coverage (#318)

nefertitirogers · Nefertiti  Rogers · Nefertiti  Rogers · web-flow · commit 6ee90bd956b8 · 2023-09-08T10:56:14.000-07:00
* init commit

* merge

* update

* async tests

* more updates to async

* lint fixes

* update mocks and lint fixes

* lint

* lint

* lint

* lint

---------

Co-authored-by: Nefertiti  Rogers &lt;nefertitirogers@Nefertitis-MacBook-Pro.local&gt;
Co-authored-by: Nefertiti  Rogers &lt;nefertitirogers@Nefertitis-MBP.localdomain&gt;
diff --git a/tests/integration_tests/mock_llm_outputs.py b/tests/integration_tests/mock_llm_outputs.py
@@ -28,6 +28,7 @@ def _invoke_llm(self, prompt, *args, **kwargs):
             pydantic.COMPILED_PROMPT_REASK_2: pydantic.LLM_OUTPUT_REASK_2,
             string.COMPILED_PROMPT: string.LLM_OUTPUT,
             string.COMPILED_PROMPT_REASK: string.LLM_OUTPUT_REASK,
+            string.COMPILED_LIST_PROMPT: string.LIST_LLM_OUTPUT,
             python_rail.VALIDATOR_PARALLELISM_PROMPT_1: python_rail.VALIDATOR_PARALLELISM_RESPONSE_1,  # noqa: E501
             python_rail.VALIDATOR_PARALLELISM_PROMPT_2: python_rail.VALIDATOR_PARALLELISM_RESPONSE_2,  # noqa: E501
             python_rail.VALIDATOR_PARALLELISM_PROMPT_3: python_rail.VALIDATOR_PARALLELISM_RESPONSE_3,  # noqa: E501
diff --git a/tests/integration_tests/test_assets/string/__init__.py b/tests/integration_tests/test_assets/string/__init__.py
@@ -11,8 +11,11 @@
 
 COMPILED_INSTRUCTIONS = reader("compiled_instructions.txt")
 COMPILED_PROMPT = reader("compiled_prompt.txt")
+COMPILED_LIST_PROMPT = reader("compiled_list_prompt.txt")
 LLM_OUTPUT = reader("llm_output.txt")
+LIST_LLM_OUTPUT = reader("llm_list_output.txt")
 RAIL_SPEC_FOR_STRING = reader("string.rail")
+RAIL_SPEC_FOR_LIST = reader("list.rail")
 
 COMPILED_PROMPT_REASK = reader("compiled_prompt_reask.txt")
 RAIL_SPEC_FOR_STRING_REASK = reader("string_reask.rail")
@@ -30,6 +33,8 @@
 
 __all__ = [
     "COMPILED_PROMPT",
+    "RAIL_SPEC_FOR_LIST",
+    "LIST_LLM_OUTPUT",
     "LLM_OUTPUT",
     "RAIL_SPEC_FOR_STRING",
     "COMPILED_PROMPT_REASK",
diff --git a/tests/integration_tests/test_assets/string/compiled_list_prompt.txt b/tests/integration_tests/test_assets/string/compiled_list_prompt.txt
@@ -0,0 +1,24 @@
+
+Generate a dataset of fake user orders. Each row of the dataset should be valid.
+
+
+Given below is XML that describes the information to extract from this document and the tags to extract it into.
+
+<output>
+    <list name="user_orders" description="Generate a list of user, and how many orders they have placed in the past." format="length: min=10 max=10">
+        <object>
+            <string name="user_id" description="The user's id." format="1-indexed"/>
+            <string name="user_name" description="The user's first name and last name" format="two-words"/>
+            <integer name="num_orders" description="The number of orders the user has placed" format="valid-range: min=0 max=50"/>
+            <date name="last_order_date" description="Date of last order"/>
+        </object>
+    </list>
+</output>
+
+
+ONLY return a valid JSON object (no other text is necessary), where the key of the field in JSON is the `name` attribute of the corresponding XML, and the value is of the type specified by the corresponding XML's tag. The JSON MUST conform to the XML format, including any types and format requests e.g. requests for lists, objects and specific types. Be correct and concise. If you are unsure anywhere, enter `null`.
+
+Here are examples of simple (XML, JSON) pairs that show the expected behavior:
+- `<string name='foo' format='two-words lower-case' />` => `{'foo': 'example one'}`
+- `<list name='bar'><string format='upper-case' /></list>` => `{"bar": ['STRING ONE', 'STRING TWO', etc.]}`
+- `<object name='baz'><string name="foo" format="capitalize two-words" /><integer name="index" format="1-indexed" /></object>` => `{'baz': {'foo': 'Some String', 'index': 1}}`
diff --git a/tests/integration_tests/test_assets/string/list.rail b/tests/integration_tests/test_assets/string/list.rail
@@ -0,0 +1,18 @@
+<rail version="0.1">
+<output>
+    <list name="user_orders" description="Generate a list of user, and how many orders they have placed in the past." format="length: 10 10" on-fail-length="noop">
+        <object>
+            <string name="user_id" description="The user's id." format="1-indexed" />
+            <string name="user_name" description="The user's first name and last name" format="two-words" />
+            <integer name="num_orders" description="The number of orders the user has placed" format="valid-range: 0 50" />
+            <date name="last_order_date" description="Date of last order" />
+        </object>
+    </list>
+</output>
+
+<prompt>
+Generate a dataset of fake user orders. Each row of the dataset should be valid.
+
+${gr.complete_json_suffix}
+</prompt>
+</rail>
diff --git a/tests/integration_tests/test_assets/string/llm_list_output.txt b/tests/integration_tests/test_assets/string/llm_list_output.txt
@@ -0,0 +1,14 @@
+{
+    'user_orders': [
+        {'user_id': 1, 'user_name': 'John Smith', 'num_orders': 10, 'last_order_date': '2020-01-01'},
+        {'user_id': 2, 'user_name': 'Jane Doe', 'num_orders': 20, 'last_order_date': '2020-02-01'},
+        {'user_id': 3, 'user_name': 'Bob Jones', 'num_orders': 30, 'last_order_date': '2020-03-01'},
+        {'user_id': 4, 'user_name': 'Alice Smith', 'num_orders': 40, 'last_order_date': '2020-04-01'},
+        {'user_id': 5, 'user_name': 'John Doe', 'num_orders': 50, 'last_order_date': '2020-05-01'},
+        {'user_id': 6, 'user_name': 'Jane Jones', 'num_orders': 0, 'last_order_date': '2020-06-01'},
+        {'user_id': 7, 'user_name': 'Bob Smith', 'num_orders': 10, 'last_order_date': '2020-07-01'},
+        {'user_id': 8, 'user_name': 'Alice Doe', 'num_orders': 20, 'last_order_date': '2020-08-01'},
+        {'user_id': 9, 'user_name': 'John Jones', 'num_orders': 30, 'last_order_date': '2020-09-01'},
+        {'user_id': 10, 'user_name': 'Jane Smith', 'num_orders': 40, 'last_order_date': '2020-10-01'}
+    ]
+}
diff --git a/tests/integration_tests/test_async.py b/tests/integration_tests/test_async.py
@@ -63,6 +63,157 @@ async def test_entity_extraction_with_reask(mocker, multiprocessing_validators:
     )
 
 
+@pytest.mark.asyncio
+async def test_entity_extraction_with_noop(mocker):
+    mocker.patch(
+        "guardrails.llm_providers.AsyncOpenAICallable",
+        new=MockAsyncOpenAICallable,
+    )
+    content = gd.docs_utils.read_pdf("docs/examples/data/chase_card_agreement.pdf")
+    guard = gd.Guard.from_rail_string(entity_extraction.RAIL_SPEC_WITH_NOOP)
+    _, final_output = await guard(
+        llm_api=openai.Completion.acreate,
+        prompt_params={"document": content[:6000]},
+        num_reasks=1,
+    )
+
+    # Assertions are made on the guard state object.
+    assert final_output == entity_extraction.VALIDATED_OUTPUT_NOOP
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For orginal prompt and output
+    assert guard_history[0].prompt == gd.Prompt(entity_extraction.COMPILED_PROMPT)
+    assert guard_history[0].output == entity_extraction.LLM_OUTPUT
+    assert guard_history[0].validated_output == entity_extraction.VALIDATED_OUTPUT_NOOP
+
+
+@pytest.mark.asyncio
+async def test_entity_extraction_with_noop_pydantic(mocker):
+    mocker.patch(
+        "guardrails.llm_providers.AsyncOpenAICallable",
+        new=MockAsyncOpenAICallable,
+    )
+    content = gd.docs_utils.read_pdf("docs/examples/data/chase_card_agreement.pdf")
+    guard = gd.Guard.from_pydantic(
+        entity_extraction.PYDANTIC_RAIL_WITH_NOOP, entity_extraction.PYDANTIC_PROMPT
+    )
+    _, final_output = await guard(
+        llm_api=openai.Completion.acreate,
+        prompt_params={"document": content[:6000]},
+        num_reasks=1,
+    )
+
+    # Assertions are made on the guard state object.
+    assert final_output == entity_extraction.VALIDATED_OUTPUT_NOOP
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For orginal prompt and output
+    assert guard_history[0].prompt == gd.Prompt(entity_extraction.COMPILED_PROMPT)
+    assert guard_history[0].output == entity_extraction.LLM_OUTPUT
+    assert guard_history[0].validated_output == entity_extraction.VALIDATED_OUTPUT_NOOP
+
+
+@pytest.mark.asyncio
+async def test_entity_extraction_with_filter(mocker):
+    """Test that the entity extraction works with re-asking."""
+    mocker.patch(
+        "guardrails.llm_providers.AsyncOpenAICallable",
+        new=MockAsyncOpenAICallable,
+    )
+
+    content = gd.docs_utils.read_pdf("docs/examples/data/chase_card_agreement.pdf")
+    guard = gd.Guard.from_rail_string(entity_extraction.RAIL_SPEC_WITH_FILTER)
+    _, final_output = await guard(
+        llm_api=openai.Completion.acreate,
+        prompt_params={"document": content[:6000]},
+        num_reasks=1,
+    )
+
+    # Assertions are made on the guard state object.
+    assert final_output == entity_extraction.VALIDATED_OUTPUT_FILTER
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For orginal prompt and output
+    assert guard_history[0].prompt == gd.Prompt(entity_extraction.COMPILED_PROMPT)
+    assert guard_history[0].output == entity_extraction.LLM_OUTPUT
+    assert (
+        guard_history[0].validated_output == entity_extraction.VALIDATED_OUTPUT_FILTER
+    )
+
+
+@pytest.mark.asyncio
+async def test_entity_extraction_with_fix(mocker):
+    """Test that the entity extraction works with re-asking."""
+    mocker.patch(
+        "guardrails.llm_providers.AsyncOpenAICallable",
+        new=MockAsyncOpenAICallable,
+    )
+
+    content = gd.docs_utils.read_pdf("docs/examples/data/chase_card_agreement.pdf")
+    guard = gd.Guard.from_rail_string(entity_extraction.RAIL_SPEC_WITH_FIX)
+    _, final_output = await guard(
+        llm_api=openai.Completion.acreate,
+        prompt_params={"document": content[:6000]},
+        num_reasks=1,
+    )
+
+    # Assertions are made on the guard state object.
+    assert final_output == entity_extraction.VALIDATED_OUTPUT_FIX
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For orginal prompt and output
+    assert guard_history[0].prompt == gd.Prompt(entity_extraction.COMPILED_PROMPT)
+    assert guard_history[0].output == entity_extraction.LLM_OUTPUT
+    assert guard_history[0].validated_output == entity_extraction.VALIDATED_OUTPUT_FIX
+
+
+@pytest.mark.asyncio
+async def test_entity_extraction_with_refrain(mocker):
+    """Test that the entity extraction works with re-asking."""
+    mocker.patch(
+        "guardrails.llm_providers.AsyncOpenAICallable",
+        new=MockAsyncOpenAICallable,
+    )
+
+    content = gd.docs_utils.read_pdf("docs/examples/data/chase_card_agreement.pdf")
+    guard = gd.Guard.from_rail_string(entity_extraction.RAIL_SPEC_WITH_REFRAIN)
+    _, final_output = await guard(
+        llm_api=openai.Completion.acreate,
+        prompt_params={"document": content[:6000]},
+        num_reasks=1,
+    )
+    # Assertions are made on the guard state object.
+    assert final_output == entity_extraction.VALIDATED_OUTPUT_REFRAIN
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For orginal prompt and output
+    assert guard_history[0].prompt == gd.Prompt(entity_extraction.COMPILED_PROMPT)
+    assert guard_history[0].output == entity_extraction.LLM_OUTPUT
+    assert (
+        guard_history[0].validated_output == entity_extraction.VALIDATED_OUTPUT_REFRAIN
+    )
+
+
 @pytest.mark.asyncio
 async def test_rail_spec_output_parse(rail_spec, llm_output, validated_output):
     """Test that the rail_spec fixture is working."""
diff --git a/tests/integration_tests/test_guard.py b/tests/integration_tests/test_guard.py
@@ -399,7 +399,6 @@ def test_string_output(mocker):
         prompt_params={"ingredients": "tomato, cheese, sour cream"},
         num_reasks=1,
     )
-
     assert final_output == string.LLM_OUTPUT
 
     guard_history = guard.guard_state.most_recent_call.history
@@ -486,6 +485,31 @@ def test_skeleton_reask(mocker):
     )
 
 
+'''def test_json_output(mocker):
+    """Test single string (non-JSON) generation."""
+    mocker.patch(
+        "guardrails.llm_providers.openai_wrapper", new=openai_completion_create
+    )
+
+    guard = gd.Guard.from_rail_string(string.RAIL_SPEC_FOR_LIST)
+    _, final_output = guard(
+        llm_api=openai.Completion.create,
+        num_reasks=1,
+    )
+    assert final_output == string.LIST_LLM_OUTPUT
+
+    guard_history = guard.guard_state.most_recent_call.history
+
+    # Check that the guard state object has the correct number of re-asks.
+    assert len(guard_history) == 1
+
+    # For original prompt and output
+    #assert guard_history[0].prompt == gd.Prompt(string.COMPILED_PROMPT)
+    assert guard_history[0].output == string.LLM_OUTPUT
+
+'''
+
+
 @pytest.mark.parametrize(
     "rail,prompt,instructions,history,llm_api,expected_prompt,"
     "expected_instructions,expected_reask_prompt,expected_reask_instructions",
diff --git a/tests/unit_tests/test_assets/simple.rail b/tests/unit_tests/test_assets/simple.rail
@@ -0,0 +1,16 @@
+<rail version="0.1">
+    <output>
+        <string name="test_string" description="A string for testing." />
+    </output>
+    <instructions>
+    
+        You are a helpful bot, who answers only with valid JSON
+    
+    </instructions>
+    
+    <prompt>
+    
+        Extract a string from the text
+    
+    </prompt>
+</rail>
diff --git a/tests/unit_tests/test_guard.py b/tests/unit_tests/test_guard.py
@@ -152,3 +152,12 @@ class EmptyModel(BaseModel):
 def test_configure(guard: Guard, expected_num_reasks: int, config_num_reasks: int):
     guard.configure(config_num_reasks)
     assert guard.num_reasks == expected_num_reasks
+
+
+def guard_init_from_rail():
+    guard = Guard.from_rail("tests/unit_tests/test_assets/simple.rail")
+    assert (
+        guard.instructions.format().source.strip()
+        == "You are a helpful bot, who answers only with valid JSON"
+    )
+    assert guard.prompt.format().source.strip() == "Extract a string from the text"
diff --git a/tests/unit_tests/test_prompt.py b/tests/unit_tests/test_prompt.py