From cc3ed0c1d6d3e5eaa0b5cc921275fddd54592e0f Mon Sep 17 00:00:00 2001 From: Hao <120852460@qq.com> Date: Mon, 27 Oct 2025 16:28:44 +0800 Subject: [PATCH 1/2] feat:use schema to structure mem_reader output --- src/memos/llms/openai.py | 3 +- src/memos/llms/vllm.py | 7 ++-- src/memos/mem_reader/simple_struct.py | 5 ++- src/memos/templates/mem_reader_prompts.py | 45 +++++++++++++++++++++++ 4 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 698bc3265..7a3d5bea0 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -56,7 +56,7 @@ def clear_cache(cls): cls._instances.clear() logger.info("OpenAI LLM instance cache cleared") - def generate(self, messages: MessageList) -> str: + def generate(self, messages: MessageList, **kwargs) -> str: """Generate a response from OpenAI LLM.""" response = self.client.chat.completions.create( model=self.config.model_name_or_path, @@ -65,6 +65,7 @@ def generate(self, messages: MessageList) -> str: temperature=self.config.temperature, max_tokens=self.config.max_tokens, top_p=self.config.top_p, + **kwargs ) logger.info(f"Response from OpenAI: {response.model_dump_json()}") response_content = response.choices[0].message.content diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py index c3750bb4b..10b93d844 100644 --- a/src/memos/llms/vllm.py +++ b/src/memos/llms/vllm.py @@ -85,16 +85,16 @@ def build_vllm_kv_cache(self, messages: Any) -> str: return prompt - def generate(self, messages: list[MessageDict]) -> str: + def generate(self, messages: list[MessageDict], **kwargs) -> str: """ Generate a response from the model. """ if self.client: - return self._generate_with_api_client(messages) + return self._generate_with_api_client(messages, **kwargs) else: raise RuntimeError("API client is not available") - def _generate_with_api_client(self, messages: list[MessageDict]) -> str: + def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> str: """ Generate response using vLLM API client. """ @@ -106,6 +106,7 @@ def _generate_with_api_client(self, messages: list[MessageDict]) -> str: "max_tokens": int(getattr(self.config, "max_tokens", 1024)), "top_p": float(getattr(self.config, "top_p", 0.9)), "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}, + **kwargs } response = self.client.chat.completions.create(**completion_kwargs) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 9f5eb9832..4f12e17ec 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -27,6 +27,7 @@ SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, SIMPLE_STRUCT_MEM_READER_PROMPT, SIMPLE_STRUCT_MEM_READER_PROMPT_ZH, + reader_output_schema ) from memos.utils import timed @@ -200,7 +201,9 @@ def _get_llm_response(self, mem_str: str) -> dict: prompt = prompt.replace(examples, "") messages = [{"role": "user", "content": prompt}] try: - response_text = self.llm.generate(messages) + response_text = self.llm.generate(messages, + response_format={"type": "json_object", + "schema": reader_output_schema}) response_json = self.parse_json_result(response_text) except Exception as e: logger.error(f"[LLM] Exception during chat generation: {e}") diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 15672f8d8..19135656c 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -417,3 +417,48 @@ } """ + +reader_output_schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "memory list": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "A brief title or identifier for the memory." + }, + "memory_type": { + "type": "string", + "enum": ["LongTermMemory", "ShortTermMemory", "WorkingMemory"], + "description": "The type of memory, expected to be 'LongTermMemory' in this context." + }, + "value": { + "type": "string", + "description": "Detailed description of the memory, including viewpoint, time, and content." + }, + "tags": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of keywords or categories associated with the memory." + } + }, + "required": ["key", "memory_type", "value", "tags"], + "additionalProperties": False + }, + "description": "List of memory entries." + }, + "summary": { + "type": "string", + "description": "A synthesized summary of the overall situation based on all memories." + } + }, + "required": ["memory list", "summary"], + "additionalProperties": False, + "description": "Structured output containing a list of memories and a summary." +} \ No newline at end of file From 211d54c8a24d1ab24f76e0201da4eb042ba7c9c7 Mon Sep 17 00:00:00 2001 From: Hao <120852460@qq.com> Date: Mon, 27 Oct 2025 16:31:29 +0800 Subject: [PATCH 2/2] format code --- src/memos/llms/openai.py | 2 +- src/memos/llms/vllm.py | 2 +- src/memos/mem_reader/simple_struct.py | 8 +-- src/memos/templates/mem_reader_prompts.py | 74 +++++++++++------------ 4 files changed, 42 insertions(+), 44 deletions(-) diff --git a/src/memos/llms/openai.py b/src/memos/llms/openai.py index 7a3d5bea0..1a161dbc2 100644 --- a/src/memos/llms/openai.py +++ b/src/memos/llms/openai.py @@ -65,7 +65,7 @@ def generate(self, messages: MessageList, **kwargs) -> str: temperature=self.config.temperature, max_tokens=self.config.max_tokens, top_p=self.config.top_p, - **kwargs + **kwargs, ) logger.info(f"Response from OpenAI: {response.model_dump_json()}") response_content = response.choices[0].message.content diff --git a/src/memos/llms/vllm.py b/src/memos/llms/vllm.py index 10b93d844..1bea2879d 100644 --- a/src/memos/llms/vllm.py +++ b/src/memos/llms/vllm.py @@ -106,7 +106,7 @@ def _generate_with_api_client(self, messages: list[MessageDict], **kwargs) -> st "max_tokens": int(getattr(self.config, "max_tokens", 1024)), "top_p": float(getattr(self.config, "top_p", 0.9)), "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}, - **kwargs + **kwargs, } response = self.client.chat.completions.create(**completion_kwargs) diff --git a/src/memos/mem_reader/simple_struct.py b/src/memos/mem_reader/simple_struct.py index 4f12e17ec..14612aaa3 100644 --- a/src/memos/mem_reader/simple_struct.py +++ b/src/memos/mem_reader/simple_struct.py @@ -27,7 +27,7 @@ SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH, SIMPLE_STRUCT_MEM_READER_PROMPT, SIMPLE_STRUCT_MEM_READER_PROMPT_ZH, - reader_output_schema + reader_output_schema, ) from memos.utils import timed @@ -201,9 +201,9 @@ def _get_llm_response(self, mem_str: str) -> dict: prompt = prompt.replace(examples, "") messages = [{"role": "user", "content": prompt}] try: - response_text = self.llm.generate(messages, - response_format={"type": "json_object", - "schema": reader_output_schema}) + response_text = self.llm.generate( + messages, response_format={"type": "json_object", "schema": reader_output_schema} + ) response_json = self.parse_json_result(response_text) except Exception as e: logger.error(f"[LLM] Exception during chat generation: {e}") diff --git a/src/memos/templates/mem_reader_prompts.py b/src/memos/templates/mem_reader_prompts.py index 19135656c..cd9679802 100644 --- a/src/memos/templates/mem_reader_prompts.py +++ b/src/memos/templates/mem_reader_prompts.py @@ -419,46 +419,44 @@ """ reader_output_schema = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "type": "object", - "properties": { - "memory list": { - "type": "array", - "items": { - "type": "object", - "properties": { - "key": { - "type": "string", - "description": "A brief title or identifier for the memory." - }, - "memory_type": { - "type": "string", - "enum": ["LongTermMemory", "ShortTermMemory", "WorkingMemory"], - "description": "The type of memory, expected to be 'LongTermMemory' in this context." - }, - "value": { - "type": "string", - "description": "Detailed description of the memory, including viewpoint, time, and content." - }, - "tags": { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "type": "object", + "properties": { + "memory list": { "type": "array", "items": { - "type": "string" + "type": "object", + "properties": { + "key": { + "type": "string", + "description": "A brief title or identifier for the memory.", + }, + "memory_type": { + "type": "string", + "enum": ["LongTermMemory", "ShortTermMemory", "WorkingMemory"], + "description": "The type of memory, expected to be 'LongTermMemory' in this context.", + }, + "value": { + "type": "string", + "description": "Detailed description of the memory, including viewpoint, time, and content.", + }, + "tags": { + "type": "array", + "items": {"type": "string"}, + "description": "List of keywords or categories associated with the memory.", + }, + }, + "required": ["key", "memory_type", "value", "tags"], + "additionalProperties": False, }, - "description": "List of keywords or categories associated with the memory." - } + "description": "List of memory entries.", + }, + "summary": { + "type": "string", + "description": "A synthesized summary of the overall situation based on all memories.", }, - "required": ["key", "memory_type", "value", "tags"], - "additionalProperties": False - }, - "description": "List of memory entries." }, - "summary": { - "type": "string", - "description": "A synthesized summary of the overall situation based on all memories." - } - }, - "required": ["memory list", "summary"], - "additionalProperties": False, - "description": "Structured output containing a list of memories and a summary." -} \ No newline at end of file + "required": ["memory list", "summary"], + "additionalProperties": False, + "description": "Structured output containing a list of memories and a summary.", +}