Standardized prompt templates and system prompt across the repo

jamesbraza · jamesbraza · commit 4fcd8422e6f8 · 2025-09-24T10:26:46.000-07:00
diff --git a/README.md b/README.md
@@ -927,15 +927,14 @@ will return much faster than the first query and we'll be certain the authors ma
 | `parsing.chunking_algorithm`                 | `ChunkingOptions.SIMPLE_OVERLAP`       | Algorithm for chunking.                                                                                 |
 | `parsing.doc_filters`                        | `None`                                 | Optional filters for allowed documents.                                                                 |
 | `parsing.use_human_readable_clinical_trials` | `False`                                | Parse clinical trial JSONs into readable text.                                                          |
-| `prompt.summary`                             | `summary_prompt`                       | Template for summarizing text, must contain variables matching `summary_prompt`.                        |
+| `prompt.summary`                             | `summary_prompt`                       | User prompt template(s) to use when generating contextual summaries.                                    |
 | `prompt.qa`                                  | `qa_prompt`                            | Template for QA, must contain variables matching `qa_prompt`.                                           |
 | `prompt.select`                              | `select_paper_prompt`                  | Template for selecting papers, must contain variables matching `select_paper_prompt`.                   |
 | `prompt.pre`                                 | `None`                                 | Optional pre-prompt templated with just the original question to append information before a qa prompt. |
 | `prompt.post`                                | `None`                                 | Optional post-processing prompt that can access PQASession fields.                                      |
-| `prompt.system`                              | `default_system_prompt`                | System prompt for the model.                                                                            |
+| `prompt.system`                              | `default_system_prompt`                | System prompt to use when generating contextual summaries and answers.                                  |
 | `prompt.use_json`                            | `True`                                 | Whether to use JSON formatting.                                                                         |
-| `prompt.summary_json`                        | `summary_json_prompt`                  | JSON-specific summary prompt.                                                                           |
-| `prompt.summary_json_system`                 | `summary_json_system_prompt`           | System prompt for JSON summaries.                                                                       |
+| `prompt.summary_json`                        | `summary_json_prompt`                  | JSON-specific user prompt template(s) to use when generating contextual summaries.                      |
 | `prompt.context_outer`                       | `CONTEXT_OUTER_PROMPT`                 | Prompt for how to format all contexts in generate answer.                                               |
 | `prompt.context_inner`                       | `CONTEXT_INNER_PROMPT`                 | Prompt for how to format a single context in generate answer. Must contain 'name' and 'text' variables. |
 | `agent.agent_llm`                            | `"gpt-4o-2024-11-20"`                  | Model to use for agent making tool selections.                                                          |
diff --git a/docs/tutorials/settings_tutorial.ipynb b/docs/tutorials/settings_tutorial.ipynb
@@ -129,10 +129,10 @@
     "    default_system_prompt,\n",
     "    env_reset_prompt,\n",
     "    env_system_prompt,\n",
+    "    include_text_prompt_template,\n",
     "    qa_prompt,\n",
     "    select_paper_prompt,\n",
     "    structured_citation_prompt,\n",
-    "    summary_json_prompt,\n",
     "    summary_json_system_prompt,\n",
     "    summary_prompt,\n",
     ")\n",
@@ -201,8 +201,9 @@
     "        post=None,\n",
     "        system=default_system_prompt,\n",
     "        use_json=True,\n",
-    "        summary_json=summary_json_prompt,\n",
-    "        summary_json_system=summary_json_system_prompt,\n",
+    "        summary_json=[\n",
+    "            summary_json_system_prompt, include_text_prompt_template\n",
+    "        ],\n",
     "        context_outer=CONTEXT_OUTER_PROMPT,\n",
     "        context_inner=CONTEXT_INNER_PROMPT,\n",
     "    ),\n",
diff --git a/docs/tutorials/settings_tutorial.md b/docs/tutorials/settings_tutorial.md
@@ -102,10 +102,10 @@ from paperqa.prompts import (
     default_system_prompt,
     env_reset_prompt,
     env_system_prompt,
+    include_text_prompt_template,
     qa_prompt,
     select_paper_prompt,
     structured_citation_prompt,
-    summary_json_prompt,
     summary_json_system_prompt,
     summary_prompt,
 )
@@ -174,8 +174,7 @@ settings = Settings(
         post=None,
         system=default_system_prompt,
         use_json=True,
-        summary_json=summary_json_prompt,
-        summary_json_system=summary_json_system_prompt,
+        summary_json=[summary_json_system_prompt, include_text_prompt_template],
         context_outer=CONTEXT_OUTER_PROMPT,
         context_inner=CONTEXT_INNER_PROMPT,
     ),
diff --git a/src/paperqa/configs/contracrow.json b/src/paperqa/configs/contracrow.json
@@ -36,8 +36,10 @@
     "post": null,
     "system": "Answer in a direct and concise tone. Your audience is an expert, so be highly specific. If there are ambiguous terms or acronyms, first define them.",
     "use_json": true,
-    "summary_json": "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\n",
-    "summary_json_system": "Provide a summary of the relevant information that could help determine if a claim is contradicted or supported by this excerpt. The excerpt may be irrelevant. Do not directly answer if it is contradicted - only summarize relevant information. Respond with the following JSON format:\n\n{{\n  \"summary\": \"...\",\n  \"relevance_score\": \"...\"\n}}\n\nwhere `summary` is relevant information from excerpt ({summary_length}) and `relevance_score` is the relevance of `summary` to support or contradict the claim (integer out of 10). If any string entry in the JSON has newlines, be sure to escape them. "
+    "summary_json": [
+      "Provide a summary of the relevant information that could help determine if a claim is contradicted or supported by this excerpt. The excerpt may be irrelevant. Do not directly answer if it is contradicted - only summarize relevant information. Respond with the following JSON format:\n\n{{\n  \"summary\": \"...\",\n  \"relevance_score\": \"...\"\n}}\n\nwhere `summary` is relevant information from excerpt ({summary_length}) and `relevance_score` is the relevance of `summary` to support or contradict the claim (integer out of 10). If any string entry in the JSON has newlines, be sure to escape them.",
+      "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}"
+    ]
   },
   "agent": {
     "agent_llm": "gpt-4o-2024-08-06",
diff --git a/src/paperqa/configs/wikicrow.json b/src/paperqa/configs/wikicrow.json
@@ -36,8 +36,10 @@
     "post": null,
     "system": "Answer in a direct and concise tone.",
     "use_json": true,
-    "summary_json": "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}\n\n",
-    "summary_json_system": "Provide a summary of the relevant information that could help answer the question based on the excerpt. The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. \n\nRespond with the following JSON format:\n\n{{\n  \"summary\": \"...\",\n  \"relevance_score\": \"...\",\n  \"gene_name: \"...\"\n}}\n\nwhere `summary` is relevant information from text - {summary_length}, \n`gene_name` is the gene discussed in the excerpt (may be different than query), and `relevance_score` is the relevance of `summary` to answer the question (integer out of 10)"
+    "summary_json": [
+      "Provide a summary of the relevant information that could help answer the question based on the excerpt. The excerpt may be irrelevant. Do not directly answer the question - only summarize relevant information. \n\nRespond with the following JSON format:\n\n{{\n  \"summary\": \"...\",\n  \"relevance_score\": \"...\",\n  \"gene_name: \"...\"\n}}\n\nwhere `summary` is relevant information from text - {summary_length}, \n`gene_name` is the gene discussed in the excerpt (may be different than query), and `relevance_score` is the relevance of `summary` to answer the question (integer out of 10)",
+      "Excerpt from {citation}\n\n----\n\n{text}\n\n----\n\nQuestion: {question}"
+    ]
   },
   "agent": {
     "agent_llm": "gpt-4-turbo-2024-04-09",
diff --git a/src/paperqa/core.py b/src/paperqa/core.py
@@ -136,7 +136,7 @@ async def _map_fxn_summary(  # noqa: PLR0912
     text: Text,
     question: str,
     summary_llm_model: LLMModel | None,
-    prompt_templates: tuple[str, str] | None,
+    prompt_templates: tuple[str | list[str], str] | None,
     extra_prompt_data: dict[str, str] | None = None,
     parser: Callable[[str], dict[str, Any]] | None = None,
     callbacks: Sequence[Callable[[str], None]] | None = None,
@@ -154,8 +154,9 @@ async def _map_fxn_summary(  # noqa: PLR0912
         text: The text to parse.
         question: The question to use for summarization.
         summary_llm_model: The LLM model to use for generating summaries.
-        prompt_templates: Optional two-elements tuple containing templates for the user and system prompts.
-            prompt_templates = (user_prompt_template, system_prompt_template)
+        prompt_templates: Optional two-tuple containing
+            the user prompt template(s) and a system prompt.
+            prompt_templates = (user_prompt_template(s), system_prompt_template)
         extra_prompt_data: Optional extra data to pass to the prompt template.
         parser: Optional parser function to parse LLM output into structured data.
             Should return dict with at least 'summary' field.
@@ -202,13 +203,27 @@ async def _map_fxn_summary(  # noqa: PLR0912
                 else cleaned_text
             ),
         } | (extra_prompt_data or {})
-        message_prompt, system_prompt = (pt.format(**data) for pt in prompt_templates)
+        user_msg_prompts: list[str] = (
+            [prompt_templates[0].format(**data)]
+            if isinstance(prompt_templates[0], str)
+            else [pt.format(**data) for pt in prompt_templates[0]]
+        )
+        system_msg = Message(role="system", content=prompt_templates[1])
+        prepend_msgs = (
+            [
+                system_msg,
+                *(Message(content=m) for m in user_msg_prompts[:-1]),
+            ]
+            if len(user_msg_prompts) > 1
+            else [system_msg]
+        )
+        msg_with_media_prompt = user_msg_prompts[-1]
         try:
             llm_result = await summary_llm_model.call_single(
                 messages=[
-                    Message(role="system", content=system_prompt),
+                    *prepend_msgs,
                     Message.create_message(
-                        text=message_prompt,
+                        text=msg_with_media_prompt,
                         images=(
                             [i.to_image_url() for i in text.media]
                             if text.media
@@ -231,8 +246,8 @@ async def _map_fxn_summary(  # noqa: PLR0912
             )
             llm_result = await summary_llm_model.call_single(
                 messages=[
-                    Message(role="system", content=system_prompt),
-                    Message(content=message_prompt),
+                    *prepend_msgs,
+                    Message(content=msg_with_media_prompt),
                     *append_msgs,
                 ],
                 callbacks=callbacks,
diff --git a/src/paperqa/docs.py b/src/paperqa/docs.py
@@ -643,18 +643,14 @@ async def aget_evidence(
             else matches
         )
 
-        prompt_templates = None
         if not answer_config.evidence_skip_summary:
-            if prompt_config.use_json:
-                prompt_templates = (
-                    prompt_config.summary_json,
-                    prompt_config.summary_json_system,
-                )
-            else:
-                prompt_templates = (
-                    prompt_config.summary,
-                    prompt_config.system,
-                )
+            prompt_templates: tuple[str | list[str], str] | None = (
+                prompt_config.summary_json
+                if prompt_config.use_json
+                else prompt_config.summary
+            ), prompt_config.system
+        else:
+            prompt_templates = None
 
         with set_llm_session_ids(session.id):
             results = await gather_with_concurrency(
diff --git a/src/paperqa/settings.py b/src/paperqa/settings.py
@@ -371,7 +371,13 @@ class PromptSettings(BaseModel):
     # SEE: https://nwtc.libguides.com/citations/MLA#s-lg-box-707489
     EXAMPLE_CITATION: ClassVar[str] = "(pqac-0f650d59)"
 
-    summary: str = summary_prompt
+    summary: str | list[str] = Field(
+        default=summary_prompt,
+        description=(
+            "User prompt template(s) to use when generating contextual summaries."
+            " Must contain variables matching the default argument `summary_prompt`."
+        ),
+    )
     qa: str = qa_prompt
     answer_iteration_prompt: str | None = Field(
         default=answer_iteration_prompt_template,
@@ -392,13 +398,25 @@ class PromptSettings(BaseModel):
         ),
     )
     post: str | None = None
-    system: str = default_system_prompt
+    system: str = Field(
+        default=default_system_prompt,
+        description="System prompt to use when generating contextual summaries and answers.",
+    )
     use_json: bool = True
     # Not thrilled about this model,
     # but need to split out the system/summary
     # to get JSON
-    summary_json: str = include_text_prompt_template
-    summary_json_system: str = summary_json_system_prompt
+    summary_json: str | list[str] = Field(
+        default_factory=lambda: [
+            summary_json_system_prompt,
+            include_text_prompt_template,
+        ],
+        description=(
+            "JSON-specific user prompt template(s) to use"
+            " when generating contextual summaries."
+            " Must contain variables matching the default argument `summary_prompt`."
+        ),
+    )
     context_outer: str = Field(
         default=CONTEXT_OUTER_PROMPT,
         description="Prompt for how to format all contexts in generate answer.",
diff --git a/tests/test_paperqa.py b/tests/test_paperqa.py
@@ -528,7 +528,8 @@ async def acompletion_that_breaks_first_context(*args, **kwargs):
 async def test_json_evidence(docs_fixture: Docs) -> None:
     settings = Settings.from_name("fast")
     settings.prompts.use_json = True
-    settings.prompts.summary_json_system = (
+    assert isinstance(settings.prompts.summary_json, list)
+    settings.prompts.summary_json[0] = (
         "Provide a summary of the excerpt that could help answer the question based on"
         " the excerpt. The excerpt may be irrelevant. Do not directly answer the"
         " question - only summarize relevant information.  Respond with the following"