From 385ee1f946d59e593ac99bb8fa72c584d88c5411 Mon Sep 17 00:00:00 2001 From: Lriver Date: Wed, 25 Jun 2025 14:20:26 +0800 Subject: [PATCH 01/12] Add meta prompt for auto-generation --- .../config/models/base_prompt_config.py | 4 ++ .../src/hugegraph_llm/config/prompt_config.py | 38 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py index 563845d33..2330e135d 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py @@ -78,6 +78,8 @@ def save_to_yaml(self): "\n".join([f" {line}" for line in self.keywords_extract_prompt.splitlines()]) ) indented_doc_input_text = "\n".join([f" {line}" for line in self.doc_input_text.splitlines()]) + indented_generate_extract_prompt = "\n".join([f" {line}" for line in self.generate_extract_prompt_template.splitlines()]) + # This can be extended to add storage fields according to the data needs to be stored yaml_content = f"""graph_schema: | @@ -107,6 +109,8 @@ def save_to_yaml(self): doc_input_text: | {indented_doc_input_text} +generate_extract_prompt_template: | +{indented_generate_extract_prompt} """ with open(yaml_file_path, "w", encoding="utf-8") as file: file.write(yaml_content) diff --git a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py index 01b92b7ef..547cda915 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py @@ -386,4 +386,42 @@ class PromptConfig(BasePromptConfig): doc_input_text_CN: str = """介绍一下Sarah,她是一位30岁的律师,还有她的室友James,他们从2010年开始一起合租。James是一名记者, 职业道路也很出色。另外,Sarah拥有一个个人网站www.sarahsplace.com,而James也经营着自己的网页,不过这里没有提到具体的网址。这两个人, Sarah和James,不仅建立起了深厚的室友情谊,还各自在网络上开辟了自己的一片天地,展示着他们各自丰富多彩的兴趣和经历。 +""" + + generate_extract_prompt_template: str = """## Your Role +You are an expert in crafting high-quality prompts for Large Language Models (LLMs), specializing in extracting graph structures from text. + +## Core Task +Your goal is to generate a new, tailored "Graph Extract Prompt Header" based on user requirements and a provided example. This new prompt will be used to guide another LLM. + +## Input Information +1. **User's Source Text**: A sample of the text for extraction. +2. **User's Desired Scenario/Direction**: A description of the user's goal. +3. **A High-Quality Few-shot Example**: A complete, working example including a sample text and the corresponding full "Graph Extract Prompt". + +## Generation Rules +1. **Analyze**: Carefully analyze the user's source text and desired scenario. +2. **Adapt**: From the provided Few-shot Example's "Graph Extract Prompt", you must learn its structure, rules, and especially the format of the `graph schema example` and `Output example` sections. +3. **Create New Content**: + - **Infer a New Schema**: Based on the user's scenario and text, create a new `graph schema example` block. + - **Synthesize a New Output**: Based on the user's text and your new schema, create a new `Output example` block. +4. **Construct the Final Prompt**: Combine the general instructions from the Few-shot Example with your newly created `graph schema example` and `Output example` to form a complete, new "Graph Extract Prompt Header". + +--- +## Provided Few-shot Example (For Your Reference) +### Example Text: +{few_shot_text} + +### Corresponding "Graph Extract Prompt": +{few_shot_prompt} +--- + +## User's Request (Generate a new prompt based on this) +### User's Source Text: +{user_text} + +### User's Desired Scenario/Direction: +{user_scenario} + +## Your Generated "Graph Extract Prompt Header": """ From 41e9e87e217f84de415b1ddee96303388be9f26c Mon Sep 17 00:00:00 2001 From: Lriver Date: Wed, 25 Jun 2025 14:22:06 +0800 Subject: [PATCH 02/12] Add few-shot examples for prompt generation --- .../prompt_examples/prompt_examples.json | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json diff --git a/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json b/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json new file mode 100644 index 000000000..f3bd33c37 --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json @@ -0,0 +1,26 @@ +[ + { + "name": "Official Person-Relationship Extraction", + "description": "A standard template for extracting Person and Webpage entities, along with their relationships (Roommate, Owns), from descriptive text.", + "text": "Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist. Additionally, Sarah is the proud owner of the website www.sarahsplace.com.", + "prompt": "## Main Task\nGiven the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator). This id must be generated ONLY if there are multiple primary keys. If there is only one primary key, the strategy for generating VID is: int (sequencially increasing).\n4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n5. Translate the schema fields into Chinese if the given text input is Chinese (Optional)\n\n## Example:\n### Input example:\n#### text:\nMeet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"person\",\"properties\":[\"name\",\"age\",\"occupation\"]}], \"edges\":[{\"edge_label\":\"roommate\", \"source_vertex_label\":\"person\",\"target_vertex_label\":\"person\",\"properties\":[\"date\"]}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Sarah\",\"label\":\"person\",\"type\":\"vertex\",\"properties\":{\"name\":\"Sarah\",\"age\":30,\"occupation\":\"attorney\"}},{\"id\":\"1:James\",\"label\":\"person\",\"type\":\"vertex\",\"properties\":{\"name\":\"James\",\"occupation\":\"journalist\"}}], \"edges\":[{\"label\":\"roommate\",\"type\":\"edge\",\"outV\":\"1:Sarah\",\"outVLabel\":\"person\",\"inV\":\"1:James\",\"inVLabel\":\"person\",\"properties\":{\"date\":\"2010\"}}]}" + }, + { + "name": "Traffic Accident Element Extraction", + "description": "Extracts key elements from a traffic accident report, including persons involved, vehicles, and responsibilities.", + "text": "On March 15, 2024, John Smith, driving a red Porsche with license plate NY-88888, collided with a scooter ridden by Mike Lee at the intersection of People's Road and Liberation Road. The collision resulted in a fracture in Mike Lee's right leg. The traffic police determined that John Smith was fully responsible for running a red light.", + "prompt": "## Main Task\nGiven the following graph schema and a piece of text about a traffic accident, your task is to extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nOn March 15, 2024, John Smith, driving a red Porsche with license plate NY-88888, collided with a scooter ridden by Mike Lee at the intersection of People's Road and Liberation Road. The collision resulted in a fracture in Mike Lee's right leg. The traffic police determined that John Smith was fully responsible for running a red light.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Person\",\"properties\":[\"name\",\"injury_level\"]},{\"vertex_label\":\"Vehicle\",\"properties\":[\"license_plate\",\"type\",\"color\"]},{\"vertex_label\":\"Accident\",\"properties\":[\"date\",\"location\",\"responsible_party\"]}], \"edges\":[{\"edge_label\":\"drives\",\"source_label\":\"Person\",\"target_label\":\"Vehicle\"},{\"edge_label\":\"participated_in\",\"source_label\":\"Person\",\"target_label\":\"Accident\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:John Smith\",\"label\":\"Person\",\"type\":\"vertex\",\"properties\":{\"name\":\"John Smith\"}},{\"id\":\"1:Mike Lee\",\"label\":\"Person\",\"type\":\"vertex\",\"properties\":{\"name\":\"Mike Lee\",\"injury_level\":\"right leg fracture\"}},{\"id\":\"2:NY-88888\",\"label\":\"Vehicle\",\"type\":\"vertex\",\"properties\":{\"license_plate\":\"NY-88888\",\"type\":\"Porsche\",\"color\":\"red\"}}],\"edges\":[{\"label\":\"drives\",\"type\":\"edge\",\"outV\":\"1:John Smith\",\"outVLabel\":\"Person\",\"inV\":\"2:NY-88888\",\"inVLabel\":\"Vehicle\",\"properties\":{}}]}" + }, + { + "name": "Financial Event Extraction", + "description": "Extracts key financial information such as companies, acquisition events, and amounts from financial news.", + "text": "Tech giant Company A announced yesterday that it will fully acquire startup Company B, which operates in the artificial intelligence sector, for a price of $2 billion. The acquisition is expected to be completed by the end of the year.", + "prompt": "## Main Task\nGiven the following graph schema and a piece of financial news, your task is to extract information about corporate mergers and acquisitions.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nTech giant Company A announced yesterday that it will fully acquire startup Company B, which operates in the artificial intelligence sector, for a price of $2 billion. The acquisition is expected to be completed by the end of the year.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Company\",\"properties\":[\"name\",\"field\"]},{\"vertex_label\":\"Acquisition\",\"properties\":[\"price\",\"currency\",\"status\"]}], \"edges\":[{\"edge_label\":\"acquirer\",\"source_label\":\"Acquisition\",\"target_label\":\"Company\"},{\"edge_label\":\"acquired\",\"source_label\":\"Acquisition\",\"target_label\":\"Company\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Company A\",\"label\":\"Company\",\"type\":\"vertex\",\"properties\":{\"name\":\"Company A\",\"field\":\"Tech\"}},{\"id\":\"1:Company B\",\"label\":\"Company\",\"type\":\"vertex\",\"properties\":{\"name\":\"Company B\",\"field\":\"artificial intelligence\"}},{\"id\":\"2:A acquires B\",\"label\":\"Acquisition\",\"type\":\"vertex\",\"properties\":{\"price\":2000000000,\"currency\":\"USD\",\"status\":\"expected to be completed by year-end\"}}],\"edges\":[{\"label\":\"acquirer\",\"type\":\"edge\",\"outV\":\"2:A acquires B\",\"outVLabel\":\"Acquisition\",\"inV\":\"1:Company A\",\"inVLabel\":\"Company\",\"properties\":{}},{\"label\":\"acquired\",\"type\":\"edge\",\"outV\":\"2:A acquires B\",\"outVLabel\":\"Acquisition\",\"inV\":\"1:Company B\",\"inVLabel\":\"Company\",\"properties\":{}}]}" + }, + { + "name": "Medical Diagnosis Extraction", + "description": "Extracts patients, symptoms, diagnosis results, and recommended drugs from medical record text.", + "text": "Patient Li Hua, presents with a headache and fever for three days. After examination, the diagnosis is a viral cold. It is recommended to take the drug 'Gankang' for treatment.", + "prompt": "## Main Task\nGiven the following graph schema and a piece of medical record, your task is to extract entities and relationships related to diagnosis and treatment.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nPatient Li Hua, presents with a headache and fever for three days. After examination, the diagnosis is a viral cold. It is recommended to take the drug 'Gankang' for treatment.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Patient\",\"properties\":[\"name\"]},{\"vertex_label\":\"Symptom\",\"properties\":[\"name\"]},{\"vertex_label\":\"Diagnosis\",\"properties\":[\"name\"]},{\"vertex_label\":\"Drug\",\"properties\":[\"name\"]}], \"edges\":[{\"edge_label\":\"has_symptom\",\"source_label\":\"Patient\",\"target_label\":\"Symptom\"},{\"edge_label\":\"diagnosed_with\",\"source_label\":\"Patient\",\"target_label\":\"Diagnosis\"},{\"edge_label\":\"recommends_drug\",\"source_label\":\"Diagnosis\",\"target_label\":\"Drug\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Li Hua\",\"label\":\"Patient\",\"type\":\"vertex\",\"properties\":{\"name\":\"Li Hua\"}},{\"id\":\"2:headache\",\"label\":\"Symptom\",\"type\":\"vertex\",\"properties\":{\"name\":\"headache\"}},{\"id\":\"2:fever\",\"label\":\"Symptom\",\"type\":\"vertex\",\"properties\":{\"name\":\"fever\"}},{\"id\":\"3:viral cold\",\"label\":\"Diagnosis\",\"type\":\"vertex\",\"properties\":{\"name\":\"viral cold\"}},{\"id\":\"4:Gankang\",\"label\":\"Drug\",\"type\":\"vertex\",\"properties\":{\"name\":\"Gankang\"}}],\"edges\":[{\"label\":\"has_symptom\",\"type\":\"edge\",\"outV\":\"1:Li Hua\",\"outVLabel\":\"Patient\",\"inV\":\"2:headache\",\"inVLabel\":\"Symptom\",\"properties\":{}},{\"label\":\"diagnosed_with\",\"type\":\"edge\",\"outV\":\"1:Li Hua\",\"outVLabel\":\"Patient\",\"inV\":\"3:viral cold\",\"inVLabel\":\"Diagnosis\",\"properties\":{}},{\"label\":\"recommends_drug\",\"type\":\"edge\",\"outV\":\"3:viral cold\",\"outVLabel\":\"Diagnosis\",\"inV\":\"4:Gankang\",\"inVLabel\":\"Drug\",\"properties\":{}}]}" + } +] From aef82af4e550ba7b9233e6d92db1f987dae8ef25 Mon Sep 17 00:00:00 2001 From: Lriver Date: Wed, 25 Jun 2025 14:24:25 +0800 Subject: [PATCH 03/12] Add PromptGenerate operator --- .../operators/llm_op/prompt_generate.py | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py new file mode 100644 index 000000000..91b49404a --- /dev/null +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import os +import json +from typing import Dict, Any +from hugegraph_llm.models.llms.base import BaseLLM +from hugegraph_llm.config import resource_path, prompt as prompt_tpl +from hugegraph_llm.utils.log import log + +class PromptGenerate: + def __init__(self, llm: BaseLLM): + self.llm = llm + + def _load_few_shot_example(self, example_name: str) -> Dict[str, Any]: + """Loads and finds the specified few-shot example from the unified JSON file.""" + examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json") + if not os.path.exists(examples_path): + raise FileNotFoundError(f"Examples file not found: {examples_path}") + + with open(examples_path, "r", encoding="utf-8") as f: + all_examples = json.load(f) + + for example in all_examples: + if example.get("name") == example_name: + return example + + raise ValueError(f"Example with name '{example_name}' not found in prompt_examples.json") + + def run(self, context: Dict[str, Any]) -> Dict[str, Any]: + """Executes the core logic of prompt generation.""" + source_text = context.get("source_text") + scenario = context.get("scenario") + example_name = context.get("example_name") + + if not all([source_text, scenario, example_name]): + raise ValueError("Missing required context: source_text, scenario, or example_name.") + # print("[DEBUG] Template content:", prompt_tpl.generate_extract_prompt_template) # 出现过重载的错误,会一直加载旧版本的模板 + few_shot_example = self._load_few_shot_example(example_name) + + meta_prompt = prompt_tpl.generate_extract_prompt_template.format( + few_shot_text=few_shot_example.get('text', ''), + few_shot_prompt=few_shot_example.get('prompt', ''), + user_text=source_text, + user_scenario=scenario + ) + + log.debug(f"Meta-prompt sent to LLM: {meta_prompt}") + generated_prompt = self.llm.generate(prompt=meta_prompt) + log.debug(f"Generated prompt from LLM: {generated_prompt}") + + context["generated_extract_prompt"] = generated_prompt + return context From 9a3b39f0acb1f8b6a1dfd9f333cc37c6145daca8 Mon Sep 17 00:00:00 2001 From: Lriver Date: Wed, 25 Jun 2025 14:25:23 +0800 Subject: [PATCH 04/12] Integrate and enhance prompt generation UI --- .../demo/rag_demo/vector_graph_block.py | 322 ++++++++++++------ 1 file changed, 220 insertions(+), 102 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index 51af045d7..7d1a76fa5 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -18,9 +18,10 @@ # pylint: disable=E1101 import asyncio - +import os +import yaml import gradio as gr - +import json from hugegraph_llm.config import huge_settings from hugegraph_llm.config import prompt from hugegraph_llm.utils.graph_index_utils import ( @@ -34,6 +35,9 @@ from hugegraph_llm.utils.hugegraph_utils import check_graph_db_connection from hugegraph_llm.utils.log import log from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info +from hugegraph_llm.config import resource_path +from hugegraph_llm.operators.llm_op.prompt_generate import PromptGenerate +from hugegraph_llm.models.llms.init_llm import LLMs def store_prompt(doc, schema, example_prompt): @@ -43,114 +47,228 @@ def store_prompt(doc, schema, example_prompt): prompt.graph_schema = schema prompt.extract_graph_prompt = example_prompt prompt.update_yaml_file() + +def generate_prompt_for_ui(source_text, scenario, example_name): + """ + Handles the UI logic for generating a new prompt. It calls the PromptGenerate operator. + """ + if not all([source_text, scenario, example_name]): + gr.Warning("Please provide original text, expected scenario, and select an example!") + return gr.update() + + try: + prompt_generator = PromptGenerate(llm=LLMs().get_chat_llm()) + context = { + "source_text": source_text, + "scenario": scenario, + "example_name": example_name + } + + result_context = prompt_generator.run(context) + + # Presents the result of generating prompt + generated_prompt = result_context.get("generated_extract_prompt", "Generation failed. Please check the logs.") + gr.Info("Prompt generated successfully!") + return generated_prompt + except Exception as e: + log.error("Error generating Prompt: %s", e, exc_info=True) + raise gr.Error(f"Error generating Prompt: {e}") + + + +def load_example_names(): + """Load all candidate examples""" + try: + examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json") + with open(examples_path, 'r', encoding='utf-8') as f: + examples = json.load(f) + return [example.get("name", "Unnamed example") for example in examples] + except (FileNotFoundError, json.JSONDecodeError): + return ["No available examples"] + +def update_example_preview(example_name): + """Update the display content based on the selected example name.""" + try: + examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json") + with open(examples_path, 'r', encoding='utf-8') as f: + all_examples = json.load(f) + + selected_example = next((ex for ex in all_examples if ex.get("name") == example_name), None) + + if selected_example: + # prompt_str = json.dumps(selected_example.get('prompt', {}), indent=2, ensure_ascii=False) + return ( + selected_example.get('description', ''), + selected_example.get('text', ''), + selected_example.get('prompt', ''), + ) + except Exception: + pass + + return "", "", "" def create_vector_graph_block(): # pylint: disable=no-member # pylint: disable=C0301 # pylint: disable=unexpected-keyword-arg - gr.Markdown( - """## Build Vector/Graph Index & Extract Knowledge Graph -- Docs: - - text: Build rag index from plain text - - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together) -- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**) - - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125) - to modify it) - - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like - **"hugegraph"**) -- Graph Extract Prompt Header: The user-defined prompt of graph extracting -- If already exist the graph data, you should click "**Rebuild vid Index**" to update the index -""" - ) - - with gr.Row(): - with gr.Column(): - with gr.Tab("text") as tab_upload_text: - input_text = gr.Textbox( - value=prompt.doc_input_text, - label="Input Doc(s)", - lines=20, - show_copy_button=True - ) - with gr.Tab("file") as tab_upload_file: - input_file = gr.File( - value=None, - label="Docs (multi-files can be selected together)", - file_count="multiple", - ) - input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15, max_lines=29) - info_extract_template = gr.Code( - value=prompt.extract_graph_prompt, label="Graph Extract Prompt Header", language="markdown", lines=15, - max_lines=29 + + with gr.Blocks() as demo: + + gr.Markdown( + """## Build Vector/Graph Index & Extract Knowledge Graph + - Docs: + - text: Build rag index from plain text + - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together) + - [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**) + - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125) + to modify it) + - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like + **"hugegraph"**) + - Graph Extract Prompt Header: The user-defined prompt of graph extracting + - If already exist the graph data, you should click "**Rebuild vid Index**" to update the index + """ ) - out = gr.Code(label="Output Info", language="json", elem_classes="code-container-edit") - with gr.Row(): - with gr.Accordion("Get RAG Info", open=False): - with gr.Column(): - vector_index_btn0 = gr.Button("Get Vector Index Info", size="sm") - graph_index_btn0 = gr.Button("Get Graph Index Info", size="sm") - with gr.Accordion("Clear RAG Data", open=False): + with gr.Row(): with gr.Column(): - vector_index_btn1 = gr.Button("Clear Chunks Vector Index", size="sm") - graph_index_btn1 = gr.Button("Clear Graph Vid Vector Index", size="sm") - graph_data_btn0 = gr.Button("Clear Graph Data", size="sm") - - vector_import_bt = gr.Button("Import into Vector", variant="primary") - graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary") - graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True) - graph_index_rebuild_bt = gr.Button("Update Vid Embedding") - - vector_index_btn0.click(get_vector_index_info, outputs=out).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - vector_index_btn1.click(clean_vector_index).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - graph_index_btn0.click(get_graph_index_info, outputs=out).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - graph_index_btn1.click(clean_all_graph_index).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - graph_data_btn0.click(clean_all_graph_data).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - graph_index_rebuild_bt.click(update_vid_embedding, outputs=out).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - - # origin_out = gr.Textbox(visible=False) - graph_extract_bt.click( - extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out] - ).then(store_prompt, inputs=[input_text, input_schema, info_extract_template], ) - - graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(update_vid_embedding).then( - store_prompt, - inputs=[input_text, input_schema, info_extract_template], - ) - - def on_tab_select(input_f, input_t, evt: gr.SelectData): - print(f"You selected {evt.value} at {evt.index} from {evt.target}") - if evt.value == "file": - return input_f, "" - if evt.value == "text": - return [], input_t - return [], "" - - tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]) - tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]) + with gr.Tab("text") as tab_upload_text: + input_text = gr.Textbox( + value=prompt.doc_input_text, + label="Input Doc(s)", + lines=20, + show_copy_button=True + ) + with gr.Tab("file") as tab_upload_file: + input_file = gr.File( + value=None, + label="Docs (multi-files can be selected together)", + file_count="multiple", + ) + input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15, max_lines=29) + info_extract_template = gr.Code( + value=prompt.extract_graph_prompt, label="Graph Extract Prompt Header", language="markdown", lines=15, + max_lines=29 + ) + out = gr.Code(label="Output Info", language="json", elem_classes="code-container-edit") + + with gr.Row(): + with gr.Accordion("Get RAG Info", open=False): + with gr.Column(): + vector_index_btn0 = gr.Button("Get Vector Index Info", size="sm") + graph_index_btn0 = gr.Button("Get Graph Index Info", size="sm") + with gr.Accordion("Clear RAG Data", open=False): + with gr.Column(): + vector_index_btn1 = gr.Button("Clear Chunks Vector Index", size="sm") + graph_index_btn1 = gr.Button("Clear Graph Vid Vector Index", size="sm") + graph_data_btn0 = gr.Button("Clear Graph Data", size="sm") + + vector_import_bt = gr.Button("Import into Vector", variant="primary") + graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary") + graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True) + graph_index_rebuild_bt = gr.Button("Update Vid Embedding") + + gr.Markdown("---") + + with gr.Accordion("Assist in generating graph extraction prompts", open=True): + gr.Markdown("Provide your **original text** and **expected scenario**, then select a reference example to generate a high-quality graph extraction prompt.") + + user_scenario_text = gr.Textbox( + label="Expected scenario/direction", + info="For example: social relationships, financial knowledge graphs, etc.", + lines=2 + ) + + example_names = load_example_names() + few_shot_dropdown = gr.Dropdown( + choices=example_names, + label="Select a Few-shot example as a reference", + value=example_names[0] if example_names and example_names[0] != "No available examples" else None + ) + + with gr.Accordion("View example details", open=False): + example_desc_preview = gr.Markdown(label="Example description") + example_text_preview = gr.Textbox(label="Example input text", lines=5, interactive=False) + example_prompt_preview = gr.Code(label="Example Graph Extract Prompt", language="markdown", interactive=False) + + generate_prompt_btn = gr.Button("🚀 Auto-generate Graph Extract Prompt", variant="primary") + + + # Bind the change event of the dropdown menu + few_shot_dropdown.change( + fn=update_example_preview, + inputs=[few_shot_dropdown], + outputs=[example_desc_preview, example_text_preview, example_prompt_preview] + ) + + # Bind the click event of the generate button. + generate_prompt_btn.click( + fn=generate_prompt_for_ui, + inputs=[input_text, user_scenario_text, few_shot_dropdown], + outputs=[info_extract_template] + ) + + # Preload the page on the first load. + def warm_up_preview(example_name): + if not example_name: + return "", "", "" + return update_example_preview(example_name) + + demo.load( + fn=warm_up_preview, + inputs=[few_shot_dropdown], + outputs=[example_desc_preview, example_text_preview, example_prompt_preview] + ) + + vector_index_btn0.click(get_vector_index_info, outputs=out).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + vector_index_btn1.click(clean_vector_index).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + graph_index_btn0.click(get_graph_index_info, outputs=out).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + graph_index_btn1.click(clean_all_graph_index).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + graph_data_btn0.click(clean_all_graph_data).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + graph_index_rebuild_bt.click(update_vid_embedding, outputs=out).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + + # origin_out = gr.Textbox(visible=False) + graph_extract_bt.click( + extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out] + ).then(store_prompt, inputs=[input_text, input_schema, info_extract_template], ) + + graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(update_vid_embedding).then( + store_prompt, + inputs=[input_text, input_schema, info_extract_template], + ) + + def on_tab_select(input_f, input_t, evt: gr.SelectData): + print(f"You selected {evt.value} at {evt.index} from {evt.target}") + if evt.value == "file": + return input_f, "" + if evt.value == "text": + return [], input_t + return [], "" + + tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]) + tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text]) return input_text, input_schema, info_extract_template @@ -184,4 +302,4 @@ async def timely_update_vid_embedding(interval_seconds: int = 3600): # pylint: disable=W0718 except Exception as e: log.warning("Failed to execute update_vid_embedding: %s", e, exc_info=True) - await asyncio.sleep(interval_seconds) + await asyncio.sleep(interval_seconds) \ No newline at end of file From ca63214d6df8ef36458fd9c1fb8853380c202877 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 00:38:57 +0800 Subject: [PATCH 05/12] style fix, save Pylint check error --- .../config/models/base_prompt_config.py | 2 +- .../demo/rag_demo/vector_graph_block.py | 15 +++------------ .../operators/llm_op/prompt_generate.py | 6 +++--- 3 files changed, 7 insertions(+), 16 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py index 2330e135d..0d6b09bfc 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py @@ -78,7 +78,7 @@ def save_to_yaml(self): "\n".join([f" {line}" for line in self.keywords_extract_prompt.splitlines()]) ) indented_doc_input_text = "\n".join([f" {line}" for line in self.doc_input_text.splitlines()]) - indented_generate_extract_prompt = "\n".join([f" {line}" for line in self.generate_extract_prompt_template.splitlines()]) + indented_generate_extract_prompt = "\n".join([f" {line}" for line in self.generate_extract_prompt_template.splitlines()]) + "\n" # This can be extended to add storage fields according to the data needs to be stored diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index 7d1a76fa5..5387ddbbc 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -19,7 +19,6 @@ import asyncio import os -import yaml import gradio as gr import json from hugegraph_llm.config import huge_settings @@ -39,7 +38,6 @@ from hugegraph_llm.operators.llm_op.prompt_generate import PromptGenerate from hugegraph_llm.models.llms.init_llm import LLMs - def store_prompt(doc, schema, example_prompt): # update env variables: doc, schema and example_prompt if prompt.doc_input_text != doc or prompt.graph_schema != schema or prompt.extract_graph_prompt != example_prompt: @@ -72,9 +70,7 @@ def generate_prompt_for_ui(source_text, scenario, example_name): return generated_prompt except Exception as e: log.error("Error generating Prompt: %s", e, exc_info=True) - raise gr.Error(f"Error generating Prompt: {e}") - - + raise gr.Error(f"Error generating Prompt: {e}") from e def load_example_names(): """Load all candidate examples""" @@ -192,34 +188,28 @@ def create_vector_graph_block(): example_prompt_preview = gr.Code(label="Example Graph Extract Prompt", language="markdown", interactive=False) generate_prompt_btn = gr.Button("🚀 Auto-generate Graph Extract Prompt", variant="primary") - - # Bind the change event of the dropdown menu few_shot_dropdown.change( fn=update_example_preview, inputs=[few_shot_dropdown], outputs=[example_desc_preview, example_text_preview, example_prompt_preview] ) - # Bind the click event of the generate button. generate_prompt_btn.click( fn=generate_prompt_for_ui, inputs=[input_text, user_scenario_text, few_shot_dropdown], outputs=[info_extract_template] ) - # Preload the page on the first load. def warm_up_preview(example_name): if not example_name: return "", "", "" return update_example_preview(example_name) - demo.load( fn=warm_up_preview, inputs=[few_shot_dropdown], outputs=[example_desc_preview, example_text_preview, example_prompt_preview] ) - vector_index_btn0.click(get_vector_index_info, outputs=out).then( store_prompt, inputs=[input_text, input_schema, info_extract_template], @@ -302,4 +292,5 @@ async def timely_update_vid_embedding(interval_seconds: int = 3600): # pylint: disable=W0718 except Exception as e: log.warning("Failed to execute update_vid_embedding: %s", e, exc_info=True) - await asyncio.sleep(interval_seconds) \ No newline at end of file + await asyncio.sleep(interval_seconds) + \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py index 91b49404a..5fe8a8833 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py @@ -23,6 +23,7 @@ from hugegraph_llm.config import resource_path, prompt as prompt_tpl from hugegraph_llm.utils.log import log + class PromptGenerate: def __init__(self, llm: BaseLLM): self.llm = llm @@ -50,7 +51,6 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: if not all([source_text, scenario, example_name]): raise ValueError("Missing required context: source_text, scenario, or example_name.") - # print("[DEBUG] Template content:", prompt_tpl.generate_extract_prompt_template) # 出现过重载的错误,会一直加载旧版本的模板 few_shot_example = self._load_few_shot_example(example_name) meta_prompt = prompt_tpl.generate_extract_prompt_template.format( @@ -60,9 +60,9 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: user_scenario=scenario ) - log.debug(f"Meta-prompt sent to LLM: {meta_prompt}") + log.debug("Meta-prompt sent to LLM: %s", meta_prompt) generated_prompt = self.llm.generate(prompt=meta_prompt) - log.debug(f"Generated prompt from LLM: {generated_prompt}") + log.debug("Generated prompt from LLM: %s", generated_prompt) context["generated_extract_prompt"] = generated_prompt return context From 4c7df3d8cb3e9fde3b62476070ec91872f042b91 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 01:01:13 +0800 Subject: [PATCH 06/12] fix Pylint error --- .../hugegraph_llm/config/models/base_prompt_config.py | 7 ++++--- .../hugegraph_llm/demo/rag_demo/vector_graph_block.py | 11 ----------- .../hugegraph_llm/operators/llm_op/prompt_generate.py | 4 ---- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py index 0d6b09bfc..a33737d2e 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py @@ -39,7 +39,7 @@ class BasePromptConfig: text2gql_graph_schema: str = '' gremlin_generate_prompt: str = '' doc_input_text: str = '' - + generate_extract_prompt_template: str = '' def ensure_yaml_file_exists(self): current_dir = Path.cwd().resolve() project_root = get_project_root() @@ -78,9 +78,10 @@ def save_to_yaml(self): "\n".join([f" {line}" for line in self.keywords_extract_prompt.splitlines()]) ) indented_doc_input_text = "\n".join([f" {line}" for line in self.doc_input_text.splitlines()]) - indented_generate_extract_prompt = "\n".join([f" {line}" for line in self.generate_extract_prompt_template.splitlines()]) + "\n" + indented_generate_extract_prompt = "\n".join( + [f" {line}" for line in self.generate_extract_prompt_template.splitlines()] + ) + "\n" - # This can be extended to add storage fields according to the data needs to be stored yaml_content = f"""graph_schema: | {indented_schema} diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index 5387ddbbc..a29695669 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -45,7 +45,6 @@ def store_prompt(doc, schema, example_prompt): prompt.graph_schema = schema prompt.extract_graph_prompt = example_prompt prompt.update_yaml_file() - def generate_prompt_for_ui(source_text, scenario, example_name): """ Handles the UI logic for generating a new prompt. It calls the PromptGenerate operator. @@ -61,9 +60,7 @@ def generate_prompt_for_ui(source_text, scenario, example_name): "scenario": scenario, "example_name": example_name } - result_context = prompt_generator.run(context) - # Presents the result of generating prompt generated_prompt = result_context.get("generated_extract_prompt", "Generation failed. Please check the logs.") gr.Info("Prompt generated successfully!") @@ -88,7 +85,6 @@ def update_example_preview(example_name): examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json") with open(examples_path, 'r', encoding='utf-8') as f: all_examples = json.load(f) - selected_example = next((ex for ex in all_examples if ex.get("name") == example_name), None) if selected_example: @@ -100,7 +96,6 @@ def update_example_preview(example_name): ) except Exception: pass - return "", "", "" @@ -108,7 +103,6 @@ def create_vector_graph_block(): # pylint: disable=no-member # pylint: disable=C0301 # pylint: disable=unexpected-keyword-arg - with gr.Blocks() as demo: gr.Markdown( @@ -163,25 +157,20 @@ def create_vector_graph_block(): graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary") graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True) graph_index_rebuild_bt = gr.Button("Update Vid Embedding") - gr.Markdown("---") - with gr.Accordion("Assist in generating graph extraction prompts", open=True): gr.Markdown("Provide your **original text** and **expected scenario**, then select a reference example to generate a high-quality graph extraction prompt.") - user_scenario_text = gr.Textbox( label="Expected scenario/direction", info="For example: social relationships, financial knowledge graphs, etc.", lines=2 ) - example_names = load_example_names() few_shot_dropdown = gr.Dropdown( choices=example_names, label="Select a Few-shot example as a reference", value=example_names[0] if example_names and example_names[0] != "No available examples" else None ) - with gr.Accordion("View example details", open=False): example_desc_preview = gr.Markdown(label="Example description") example_text_preview = gr.Textbox(label="Example input text", lines=5, interactive=False) diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py index 5fe8a8833..15367c49f 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py @@ -33,14 +33,11 @@ def _load_few_shot_example(self, example_name: str) -> Dict[str, Any]: examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json") if not os.path.exists(examples_path): raise FileNotFoundError(f"Examples file not found: {examples_path}") - with open(examples_path, "r", encoding="utf-8") as f: all_examples = json.load(f) - for example in all_examples: if example.get("name") == example_name: return example - raise ValueError(f"Example with name '{example_name}' not found in prompt_examples.json") def run(self, context: Dict[str, Any]) -> Dict[str, Any]: @@ -59,7 +56,6 @@ def run(self, context: Dict[str, Any]) -> Dict[str, Any]: user_text=source_text, user_scenario=scenario ) - log.debug("Meta-prompt sent to LLM: %s", meta_prompt) generated_prompt = self.llm.generate(prompt=meta_prompt) log.debug("Generated prompt from LLM: %s", generated_prompt) From 28a48d1fdf5a2a1ce217ba8a74a401714cbbf5b5 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 01:40:29 +0800 Subject: [PATCH 07/12] Add the function so that the function is not too long, making it easier to read and maintain, fix code style --- .../config/models/base_prompt_config.py | 1 - .../demo/rag_demo/vector_graph_block.py | 91 ++++++++++--------- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py index a33737d2e..e00476352 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py @@ -81,7 +81,6 @@ def save_to_yaml(self): indented_generate_extract_prompt = "\n".join( [f" {line}" for line in self.generate_extract_prompt_template.splitlines()] ) + "\n" - # This can be extended to add storage fields according to the data needs to be stored yaml_content = f"""graph_schema: | {indented_schema} diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index a29695669..b436fe159 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -19,8 +19,8 @@ import asyncio import os -import gradio as gr import json +import gradio as gr from hugegraph_llm.config import huge_settings from hugegraph_llm.config import prompt from hugegraph_llm.utils.graph_index_utils import ( @@ -88,17 +88,60 @@ def update_example_preview(example_name): selected_example = next((ex for ex in all_examples if ex.get("name") == example_name), None) if selected_example: - # prompt_str = json.dumps(selected_example.get('prompt', {}), indent=2, ensure_ascii=False) return ( selected_example.get('description', ''), selected_example.get('text', ''), selected_example.get('prompt', ''), ) - except Exception: + except (FileNotFoundError, json.JSONDecodeError) as e: + log.warning("Could not update example preview: %s", e) pass return "", "", "" +def _create_prompt_helper_block(demo, input_text, info_extract_template): + with gr.Accordion("Assist in generating graph extraction prompts", open=True): + gr.Markdown( + "Provide your **original text** and **expected scenario**, then select a reference example to generate a high-quality graph extraction prompt.") + user_scenario_text = gr.Textbox( + label="Expected scenario/direction", + info="For example: social relationships, financial knowledge graphs, etc.", + lines=2 + ) + example_names = load_example_names() + few_shot_dropdown = gr.Dropdown( + choices=example_names, + label="Select a Few-shot example as a reference", + value=example_names[0] if example_names and example_names[0] != "No available examples" else None + ) + with gr.Accordion("View example details", open=False): + example_desc_preview = gr.Markdown(label="Example description") + example_text_preview = gr.Textbox(label="Example input text", lines=5, interactive=False) + example_prompt_preview = gr.Code(label="Example Graph Extract Prompt", language="markdown", + interactive=False) + generate_prompt_btn = gr.Button("🚀 Auto-generate Graph Extract Prompt", variant="primary") + # Bind the change event of the dropdown menu + few_shot_dropdown.change( + fn=update_example_preview, + inputs=[few_shot_dropdown], + outputs=[example_desc_preview, example_text_preview, example_prompt_preview] + ) + # Bind the click event of the generate button. + generate_prompt_btn.click( + fn=generate_prompt_for_ui, + inputs=[input_text, user_scenario_text, few_shot_dropdown], + outputs=[info_extract_template] + ) + # Preload the page on the first load. + def warm_up_preview(example_name): + if not example_name: + return "", "", "" + return update_example_preview(example_name) + demo.load( + fn=warm_up_preview, + inputs=[few_shot_dropdown], + outputs=[example_desc_preview, example_text_preview, example_prompt_preview] + ) def create_vector_graph_block(): # pylint: disable=no-member # pylint: disable=C0301 @@ -158,47 +201,7 @@ def create_vector_graph_block(): graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True) graph_index_rebuild_bt = gr.Button("Update Vid Embedding") gr.Markdown("---") - with gr.Accordion("Assist in generating graph extraction prompts", open=True): - gr.Markdown("Provide your **original text** and **expected scenario**, then select a reference example to generate a high-quality graph extraction prompt.") - user_scenario_text = gr.Textbox( - label="Expected scenario/direction", - info="For example: social relationships, financial knowledge graphs, etc.", - lines=2 - ) - example_names = load_example_names() - few_shot_dropdown = gr.Dropdown( - choices=example_names, - label="Select a Few-shot example as a reference", - value=example_names[0] if example_names and example_names[0] != "No available examples" else None - ) - with gr.Accordion("View example details", open=False): - example_desc_preview = gr.Markdown(label="Example description") - example_text_preview = gr.Textbox(label="Example input text", lines=5, interactive=False) - example_prompt_preview = gr.Code(label="Example Graph Extract Prompt", language="markdown", interactive=False) - - generate_prompt_btn = gr.Button("🚀 Auto-generate Graph Extract Prompt", variant="primary") - # Bind the change event of the dropdown menu - few_shot_dropdown.change( - fn=update_example_preview, - inputs=[few_shot_dropdown], - outputs=[example_desc_preview, example_text_preview, example_prompt_preview] - ) - # Bind the click event of the generate button. - generate_prompt_btn.click( - fn=generate_prompt_for_ui, - inputs=[input_text, user_scenario_text, few_shot_dropdown], - outputs=[info_extract_template] - ) - # Preload the page on the first load. - def warm_up_preview(example_name): - if not example_name: - return "", "", "" - return update_example_preview(example_name) - demo.load( - fn=warm_up_preview, - inputs=[few_shot_dropdown], - outputs=[example_desc_preview, example_text_preview, example_prompt_preview] - ) + _create_prompt_helper_block(demo, input_text, info_extract_template) vector_index_btn0.click(get_vector_index_info, outputs=out).then( store_prompt, inputs=[input_text, input_schema, info_extract_template], From 1b281904322b9659c4a183ac509634c99858ecae Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 01:58:43 +0800 Subject: [PATCH 08/12] fix pylint warring --- .../src/hugegraph_llm/demo/rag_demo/vector_graph_block.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index b436fe159..4d08114a2 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -45,14 +45,13 @@ def store_prompt(doc, schema, example_prompt): prompt.graph_schema = schema prompt.extract_graph_prompt = example_prompt prompt.update_yaml_file() -def generate_prompt_for_ui(source_text, scenario, example_name): +def generate_prompt_for_ui(source_text, scenario, example_name): """ Handles the UI logic for generating a new prompt. It calls the PromptGenerate operator. """ if not all([source_text, scenario, example_name]): gr.Warning("Please provide original text, expected scenario, and select an example!") return gr.update() - try: prompt_generator = PromptGenerate(llm=LLMs().get_chat_llm()) context = { @@ -95,12 +94,13 @@ def update_example_preview(example_name): ) except (FileNotFoundError, json.JSONDecodeError) as e: log.warning("Could not update example preview: %s", e) - pass return "", "", "" def _create_prompt_helper_block(demo, input_text, info_extract_template): with gr.Accordion("Assist in generating graph extraction prompts", open=True): gr.Markdown( - "Provide your **original text** and **expected scenario**, then select a reference example to generate a high-quality graph extraction prompt.") + "Provide your **original text** and **expected scenario**, " + "then select a reference example to generate a high-quality graph extraction prompt." + ) user_scenario_text = gr.Textbox( label="Expected scenario/direction", info="For example: social relationships, financial knowledge graphs, etc.", From ef0b5bfbb08d05d83d0e3dd4ef510bffac9e4ca2 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 02:07:30 +0800 Subject: [PATCH 09/12] Compliant with PEP 8 standard --- .../src/hugegraph_llm/demo/rag_demo/vector_graph_block.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index 4d08114a2..5a7b5694e 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -45,6 +45,8 @@ def store_prompt(doc, schema, example_prompt): prompt.graph_schema = schema prompt.extract_graph_prompt = example_prompt prompt.update_yaml_file() + + def generate_prompt_for_ui(source_text, scenario, example_name): """ Handles the UI logic for generating a new prompt. It calls the PromptGenerate operator. @@ -95,6 +97,8 @@ def update_example_preview(example_name): except (FileNotFoundError, json.JSONDecodeError) as e: log.warning("Could not update example preview: %s", e) return "", "", "" + + def _create_prompt_helper_block(demo, input_text, info_extract_template): with gr.Accordion("Assist in generating graph extraction prompts", open=True): gr.Markdown( @@ -131,6 +135,7 @@ def _create_prompt_helper_block(demo, input_text, info_extract_template): inputs=[input_text, user_scenario_text, few_shot_dropdown], outputs=[info_extract_template] ) + # Preload the page on the first load. def warm_up_preview(example_name): if not example_name: @@ -142,6 +147,8 @@ def warm_up_preview(example_name): inputs=[few_shot_dropdown], outputs=[example_desc_preview, example_text_preview, example_prompt_preview] ) + + def create_vector_graph_block(): # pylint: disable=no-member # pylint: disable=C0301 @@ -254,6 +261,7 @@ def on_tab_select(input_f, input_t, evt: gr.SelectData): return input_text, input_schema, info_extract_template + async def timely_update_vid_embedding(interval_seconds: int = 3600): """ Periodically updates vertex embeddings in the graph database. From b9398a86395a8306e9b00658741748561a6ba418 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 02:19:46 +0800 Subject: [PATCH 10/12] Remove excess white space --- .../src/hugegraph_llm/demo/rag_demo/vector_graph_block.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index 5a7b5694e..ea3d23269 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -135,7 +135,7 @@ def _create_prompt_helper_block(demo, input_text, info_extract_template): inputs=[input_text, user_scenario_text, few_shot_dropdown], outputs=[info_extract_template] ) - + # Preload the page on the first load. def warm_up_preview(example_name): if not example_name: From 35ee19dbc72956358158176284eecf1df6083113 Mon Sep 17 00:00:00 2001 From: imbajin Date: Tue, 1 Jul 2025 19:16:29 +0800 Subject: [PATCH 11/12] Refactor imports and minor cleanup Reordered and deduplicated imports for consistency in base_prompt_config.py, vector_graph_block.py, and prompt_generate.py. Made minor code style improvements and fixed typos in comments to enhance readability and maintainability. --- .../config/models/base_prompt_config.py | 5 ++-- .../demo/rag_demo/vector_graph_block.py | 26 ++++++++++++------- .../operators/llm_op/prompt_generate.py | 5 ++-- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py index e00476352..691247b3c 100644 --- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py +++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py @@ -15,14 +15,14 @@ # specific language governing permissions and limitations # under the License. -import sys import os +import sys from pathlib import Path import yaml -from hugegraph_llm.utils.log import log from hugegraph_llm.utils.anchor import get_project_root +from hugegraph_llm.utils.log import log dir_name = os.path.dirname F_NAME = "config_prompt.yaml" @@ -40,6 +40,7 @@ class BasePromptConfig: gremlin_generate_prompt: str = '' doc_input_text: str = '' generate_extract_prompt_template: str = '' + def ensure_yaml_file_exists(self): current_dir = Path.cwd().resolve() project_root = get_project_root() diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index ea3d23269..f80710aba 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -18,11 +18,16 @@ # pylint: disable=E1101 import asyncio -import os import json +import os + import gradio as gr + from hugegraph_llm.config import huge_settings from hugegraph_llm.config import prompt +from hugegraph_llm.config import resource_path +from hugegraph_llm.models.llms.init_llm import LLMs +from hugegraph_llm.operators.llm_op.prompt_generate import PromptGenerate from hugegraph_llm.utils.graph_index_utils import ( get_graph_index_info, clean_all_graph_index, @@ -34,9 +39,7 @@ from hugegraph_llm.utils.hugegraph_utils import check_graph_db_connection from hugegraph_llm.utils.log import log from hugegraph_llm.utils.vector_index_utils import clean_vector_index, build_vector_index, get_vector_index_info -from hugegraph_llm.config import resource_path -from hugegraph_llm.operators.llm_op.prompt_generate import PromptGenerate -from hugegraph_llm.models.llms.init_llm import LLMs + def store_prompt(doc, schema, example_prompt): # update env variables: doc, schema and example_prompt @@ -59,10 +62,10 @@ def generate_prompt_for_ui(source_text, scenario, example_name): context = { "source_text": source_text, "scenario": scenario, - "example_name": example_name + "example_name": example_name } result_context = prompt_generator.run(context) - # Presents the result of generating prompt + # Presents the result of generating prompt generated_prompt = result_context.get("generated_extract_prompt", "Generation failed. Please check the logs.") gr.Info("Prompt generated successfully!") return generated_prompt @@ -70,6 +73,7 @@ def generate_prompt_for_ui(source_text, scenario, example_name): log.error("Error generating Prompt: %s", e, exc_info=True) raise gr.Error(f"Error generating Prompt: {e}") from e + def load_example_names(): """Load all candidate examples""" try: @@ -80,6 +84,7 @@ def load_example_names(): except (FileNotFoundError, json.JSONDecodeError): return ["No available examples"] + def update_example_preview(example_name): """Update the display content based on the selected example name.""" try: @@ -129,7 +134,7 @@ def _create_prompt_helper_block(demo, input_text, info_extract_template): inputs=[few_shot_dropdown], outputs=[example_desc_preview, example_text_preview, example_prompt_preview] ) - # Bind the click event of the generate button. + # Bind the click event of the generated button. generate_prompt_btn.click( fn=generate_prompt_for_ui, inputs=[input_text, user_scenario_text, few_shot_dropdown], @@ -185,7 +190,8 @@ def create_vector_graph_block(): label="Docs (multi-files can be selected together)", file_count="multiple", ) - input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15, max_lines=29) + input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15, + max_lines=29) info_extract_template = gr.Code( value=prompt.extract_graph_prompt, label="Graph Extract Prompt Header", language="markdown", lines=15, max_lines=29 @@ -243,7 +249,8 @@ def create_vector_graph_block(): extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out] ).then(store_prompt, inputs=[input_text, input_schema, info_extract_template], ) - graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(update_vid_embedding).then( + graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then( + update_vid_embedding).then( store_prompt, inputs=[input_text, input_schema, info_extract_template], ) @@ -293,4 +300,3 @@ async def timely_update_vid_embedding(interval_seconds: int = 3600): except Exception as e: log.warning("Failed to execute update_vid_embedding: %s", e, exc_info=True) await asyncio.sleep(interval_seconds) - \ No newline at end of file diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py index 15367c49f..a7ea1e3dd 100644 --- a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py +++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py @@ -16,11 +16,12 @@ # under the License. -import os import json +import os from typing import Dict, Any -from hugegraph_llm.models.llms.base import BaseLLM + from hugegraph_llm.config import resource_path, prompt as prompt_tpl +from hugegraph_llm.models.llms.base import BaseLLM from hugegraph_llm.utils.log import log From d510683764cb32c845fd8d0254d1ea5747d65b12 Mon Sep 17 00:00:00 2001 From: Lriver Date: Tue, 1 Jul 2025 20:53:43 +0800 Subject: [PATCH 12/12] Fix UI interface Doc format index issue --- .../demo/rag_demo/vector_graph_block.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py index f80710aba..c3533036d 100644 --- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py +++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py @@ -163,13 +163,13 @@ def create_vector_graph_block(): gr.Markdown( """## Build Vector/Graph Index & Extract Knowledge Graph - Docs: - - text: Build rag index from plain text - - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together) + - text: Build rag index from plain text + - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together) - [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**) - - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125) - to modify it) - - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like - **"hugegraph"**) + - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125) + to modify it) + - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like + **"hugegraph"**) - Graph Extract Prompt Header: The user-defined prompt of graph extracting - If already exist the graph data, you should click "**Rebuild vid Index**" to update the index """