diff --git a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py
index 563845d33..691247b3c 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/models/base_prompt_config.py
@@ -15,14 +15,14 @@
# specific language governing permissions and limitations
# under the License.
-import sys
import os
+import sys
from pathlib import Path
import yaml
-from hugegraph_llm.utils.log import log
from hugegraph_llm.utils.anchor import get_project_root
+from hugegraph_llm.utils.log import log
dir_name = os.path.dirname
F_NAME = "config_prompt.yaml"
@@ -39,6 +39,7 @@ class BasePromptConfig:
text2gql_graph_schema: str = ''
gremlin_generate_prompt: str = ''
doc_input_text: str = ''
+ generate_extract_prompt_template: str = ''
def ensure_yaml_file_exists(self):
current_dir = Path.cwd().resolve()
@@ -78,7 +79,9 @@ def save_to_yaml(self):
"\n".join([f" {line}" for line in self.keywords_extract_prompt.splitlines()])
)
indented_doc_input_text = "\n".join([f" {line}" for line in self.doc_input_text.splitlines()])
-
+ indented_generate_extract_prompt = "\n".join(
+ [f" {line}" for line in self.generate_extract_prompt_template.splitlines()]
+ ) + "\n"
# This can be extended to add storage fields according to the data needs to be stored
yaml_content = f"""graph_schema: |
{indented_schema}
@@ -107,6 +110,8 @@ def save_to_yaml(self):
doc_input_text: |
{indented_doc_input_text}
+generate_extract_prompt_template: |
+{indented_generate_extract_prompt}
"""
with open(yaml_file_path, "w", encoding="utf-8") as file:
file.write(yaml_content)
diff --git a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
index 01b92b7ef..547cda915 100644
--- a/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
+++ b/hugegraph-llm/src/hugegraph_llm/config/prompt_config.py
@@ -386,4 +386,42 @@ class PromptConfig(BasePromptConfig):
doc_input_text_CN: str = """介绍一下Sarah,她是一位30岁的律师,还有她的室友James,他们从2010年开始一起合租。James是一名记者,
职业道路也很出色。另外,Sarah拥有一个个人网站www.sarahsplace.com,而James也经营着自己的网页,不过这里没有提到具体的网址。这两个人,
Sarah和James,不仅建立起了深厚的室友情谊,还各自在网络上开辟了自己的一片天地,展示着他们各自丰富多彩的兴趣和经历。
+"""
+
+ generate_extract_prompt_template: str = """## Your Role
+You are an expert in crafting high-quality prompts for Large Language Models (LLMs), specializing in extracting graph structures from text.
+
+## Core Task
+Your goal is to generate a new, tailored "Graph Extract Prompt Header" based on user requirements and a provided example. This new prompt will be used to guide another LLM.
+
+## Input Information
+1. **User's Source Text**: A sample of the text for extraction.
+2. **User's Desired Scenario/Direction**: A description of the user's goal.
+3. **A High-Quality Few-shot Example**: A complete, working example including a sample text and the corresponding full "Graph Extract Prompt".
+
+## Generation Rules
+1. **Analyze**: Carefully analyze the user's source text and desired scenario.
+2. **Adapt**: From the provided Few-shot Example's "Graph Extract Prompt", you must learn its structure, rules, and especially the format of the `graph schema example` and `Output example` sections.
+3. **Create New Content**:
+ - **Infer a New Schema**: Based on the user's scenario and text, create a new `graph schema example` block.
+ - **Synthesize a New Output**: Based on the user's text and your new schema, create a new `Output example` block.
+4. **Construct the Final Prompt**: Combine the general instructions from the Few-shot Example with your newly created `graph schema example` and `Output example` to form a complete, new "Graph Extract Prompt Header".
+
+---
+## Provided Few-shot Example (For Your Reference)
+### Example Text:
+{few_shot_text}
+
+### Corresponding "Graph Extract Prompt":
+{few_shot_prompt}
+---
+
+## User's Request (Generate a new prompt based on this)
+### User's Source Text:
+{user_text}
+
+### User's Desired Scenario/Direction:
+{user_scenario}
+
+## Your Generated "Graph Extract Prompt Header":
"""
diff --git a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
index 51af045d7..c3533036d 100644
--- a/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
+++ b/hugegraph-llm/src/hugegraph_llm/demo/rag_demo/vector_graph_block.py
@@ -18,11 +18,16 @@
# pylint: disable=E1101
import asyncio
+import json
+import os
import gradio as gr
from hugegraph_llm.config import huge_settings
from hugegraph_llm.config import prompt
+from hugegraph_llm.config import resource_path
+from hugegraph_llm.models.llms.init_llm import LLMs
+from hugegraph_llm.operators.llm_op.prompt_generate import PromptGenerate
from hugegraph_llm.utils.graph_index_utils import (
get_graph_index_info,
clean_all_graph_index,
@@ -45,115 +50,225 @@ def store_prompt(doc, schema, example_prompt):
prompt.update_yaml_file()
+def generate_prompt_for_ui(source_text, scenario, example_name):
+ """
+ Handles the UI logic for generating a new prompt. It calls the PromptGenerate operator.
+ """
+ if not all([source_text, scenario, example_name]):
+ gr.Warning("Please provide original text, expected scenario, and select an example!")
+ return gr.update()
+ try:
+ prompt_generator = PromptGenerate(llm=LLMs().get_chat_llm())
+ context = {
+ "source_text": source_text,
+ "scenario": scenario,
+ "example_name": example_name
+ }
+ result_context = prompt_generator.run(context)
+ # Presents the result of generating prompt
+ generated_prompt = result_context.get("generated_extract_prompt", "Generation failed. Please check the logs.")
+ gr.Info("Prompt generated successfully!")
+ return generated_prompt
+ except Exception as e:
+ log.error("Error generating Prompt: %s", e, exc_info=True)
+ raise gr.Error(f"Error generating Prompt: {e}") from e
+
+
+def load_example_names():
+ """Load all candidate examples"""
+ try:
+ examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json")
+ with open(examples_path, 'r', encoding='utf-8') as f:
+ examples = json.load(f)
+ return [example.get("name", "Unnamed example") for example in examples]
+ except (FileNotFoundError, json.JSONDecodeError):
+ return ["No available examples"]
+
+
+def update_example_preview(example_name):
+ """Update the display content based on the selected example name."""
+ try:
+ examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json")
+ with open(examples_path, 'r', encoding='utf-8') as f:
+ all_examples = json.load(f)
+ selected_example = next((ex for ex in all_examples if ex.get("name") == example_name), None)
+
+ if selected_example:
+ return (
+ selected_example.get('description', ''),
+ selected_example.get('text', ''),
+ selected_example.get('prompt', ''),
+ )
+ except (FileNotFoundError, json.JSONDecodeError) as e:
+ log.warning("Could not update example preview: %s", e)
+ return "", "", ""
+
+
+def _create_prompt_helper_block(demo, input_text, info_extract_template):
+ with gr.Accordion("Assist in generating graph extraction prompts", open=True):
+ gr.Markdown(
+ "Provide your **original text** and **expected scenario**, "
+ "then select a reference example to generate a high-quality graph extraction prompt."
+ )
+ user_scenario_text = gr.Textbox(
+ label="Expected scenario/direction",
+ info="For example: social relationships, financial knowledge graphs, etc.",
+ lines=2
+ )
+ example_names = load_example_names()
+ few_shot_dropdown = gr.Dropdown(
+ choices=example_names,
+ label="Select a Few-shot example as a reference",
+ value=example_names[0] if example_names and example_names[0] != "No available examples" else None
+ )
+ with gr.Accordion("View example details", open=False):
+ example_desc_preview = gr.Markdown(label="Example description")
+ example_text_preview = gr.Textbox(label="Example input text", lines=5, interactive=False)
+ example_prompt_preview = gr.Code(label="Example Graph Extract Prompt", language="markdown",
+ interactive=False)
+
+ generate_prompt_btn = gr.Button("🚀 Auto-generate Graph Extract Prompt", variant="primary")
+ # Bind the change event of the dropdown menu
+ few_shot_dropdown.change(
+ fn=update_example_preview,
+ inputs=[few_shot_dropdown],
+ outputs=[example_desc_preview, example_text_preview, example_prompt_preview]
+ )
+ # Bind the click event of the generated button.
+ generate_prompt_btn.click(
+ fn=generate_prompt_for_ui,
+ inputs=[input_text, user_scenario_text, few_shot_dropdown],
+ outputs=[info_extract_template]
+ )
+
+ # Preload the page on the first load.
+ def warm_up_preview(example_name):
+ if not example_name:
+ return "", "", ""
+ return update_example_preview(example_name)
+
+ demo.load(
+ fn=warm_up_preview,
+ inputs=[few_shot_dropdown],
+ outputs=[example_desc_preview, example_text_preview, example_prompt_preview]
+ )
+
+
def create_vector_graph_block():
# pylint: disable=no-member
# pylint: disable=C0301
# pylint: disable=unexpected-keyword-arg
- gr.Markdown(
- """## Build Vector/Graph Index & Extract Knowledge Graph
-- Docs:
- - text: Build rag index from plain text
- - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together)
-- [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**)
- - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125)
- to modify it)
- - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like
- **"hugegraph"**)
-- Graph Extract Prompt Header: The user-defined prompt of graph extracting
-- If already exist the graph data, you should click "**Rebuild vid Index**" to update the index
-"""
- )
-
- with gr.Row():
- with gr.Column():
- with gr.Tab("text") as tab_upload_text:
- input_text = gr.Textbox(
- value=prompt.doc_input_text,
- label="Input Doc(s)",
- lines=20,
- show_copy_button=True
- )
- with gr.Tab("file") as tab_upload_file:
- input_file = gr.File(
- value=None,
- label="Docs (multi-files can be selected together)",
- file_count="multiple",
- )
- input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15, max_lines=29)
- info_extract_template = gr.Code(
- value=prompt.extract_graph_prompt, label="Graph Extract Prompt Header", language="markdown", lines=15,
- max_lines=29
+ with gr.Blocks() as demo:
+
+ gr.Markdown(
+ """## Build Vector/Graph Index & Extract Knowledge Graph
+ - Docs:
+ - text: Build rag index from plain text
+ - file: Upload file(s) which should be TXT or .docx (Multiple files can be selected together)
+ - [Schema](https://hugegraph.apache.org/docs/clients/restful-api/schema/): (Accept **2 types**)
+ - User-defined Schema (JSON format, follow the [template](https://github.com/apache/incubator-hugegraph-ai/blob/aff3bbe25fa91c3414947a196131be812c20ef11/hugegraph-llm/src/hugegraph_llm/config/config_data.py#L125)
+ to modify it)
+ - Specify the name of the HugeGraph graph instance, it will automatically get the schema from it (like
+ **"hugegraph"**)
+ - Graph Extract Prompt Header: The user-defined prompt of graph extracting
+ - If already exist the graph data, you should click "**Rebuild vid Index**" to update the index
+ """
)
- out = gr.Code(label="Output Info", language="json", elem_classes="code-container-edit")
- with gr.Row():
- with gr.Accordion("Get RAG Info", open=False):
+ with gr.Row():
with gr.Column():
- vector_index_btn0 = gr.Button("Get Vector Index Info", size="sm")
- graph_index_btn0 = gr.Button("Get Graph Index Info", size="sm")
- with gr.Accordion("Clear RAG Data", open=False):
- with gr.Column():
- vector_index_btn1 = gr.Button("Clear Chunks Vector Index", size="sm")
- graph_index_btn1 = gr.Button("Clear Graph Vid Vector Index", size="sm")
- graph_data_btn0 = gr.Button("Clear Graph Data", size="sm")
-
- vector_import_bt = gr.Button("Import into Vector", variant="primary")
- graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary")
- graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True)
- graph_index_rebuild_bt = gr.Button("Update Vid Embedding")
-
- vector_index_btn0.click(get_vector_index_info, outputs=out).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- vector_index_btn1.click(clean_vector_index).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- graph_index_btn0.click(get_graph_index_info, outputs=out).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- graph_index_btn1.click(clean_all_graph_index).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- graph_data_btn0.click(clean_all_graph_data).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
- graph_index_rebuild_bt.click(update_vid_embedding, outputs=out).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
-
- # origin_out = gr.Textbox(visible=False)
- graph_extract_bt.click(
- extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out]
- ).then(store_prompt, inputs=[input_text, input_schema, info_extract_template], )
-
- graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(update_vid_embedding).then(
- store_prompt,
- inputs=[input_text, input_schema, info_extract_template],
- )
-
- def on_tab_select(input_f, input_t, evt: gr.SelectData):
- print(f"You selected {evt.value} at {evt.index} from {evt.target}")
- if evt.value == "file":
- return input_f, ""
- if evt.value == "text":
- return [], input_t
- return [], ""
-
- tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
- tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
+ with gr.Tab("text") as tab_upload_text:
+ input_text = gr.Textbox(
+ value=prompt.doc_input_text,
+ label="Input Doc(s)",
+ lines=20,
+ show_copy_button=True
+ )
+ with gr.Tab("file") as tab_upload_file:
+ input_file = gr.File(
+ value=None,
+ label="Docs (multi-files can be selected together)",
+ file_count="multiple",
+ )
+ input_schema = gr.Code(value=prompt.graph_schema, label="Graph Schema", language="json", lines=15,
+ max_lines=29)
+ info_extract_template = gr.Code(
+ value=prompt.extract_graph_prompt, label="Graph Extract Prompt Header", language="markdown", lines=15,
+ max_lines=29
+ )
+ out = gr.Code(label="Output Info", language="json", elem_classes="code-container-edit")
+
+ with gr.Row():
+ with gr.Accordion("Get RAG Info", open=False):
+ with gr.Column():
+ vector_index_btn0 = gr.Button("Get Vector Index Info", size="sm")
+ graph_index_btn0 = gr.Button("Get Graph Index Info", size="sm")
+ with gr.Accordion("Clear RAG Data", open=False):
+ with gr.Column():
+ vector_index_btn1 = gr.Button("Clear Chunks Vector Index", size="sm")
+ graph_index_btn1 = gr.Button("Clear Graph Vid Vector Index", size="sm")
+ graph_data_btn0 = gr.Button("Clear Graph Data", size="sm")
+
+ vector_import_bt = gr.Button("Import into Vector", variant="primary")
+ graph_extract_bt = gr.Button("Extract Graph Data (1)", variant="primary")
+ graph_loading_bt = gr.Button("Load into GraphDB (2)", interactive=True)
+ graph_index_rebuild_bt = gr.Button("Update Vid Embedding")
+ gr.Markdown("---")
+ _create_prompt_helper_block(demo, input_text, info_extract_template)
+ vector_index_btn0.click(get_vector_index_info, outputs=out).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ vector_index_btn1.click(clean_vector_index).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ vector_import_bt.click(build_vector_index, inputs=[input_file, input_text], outputs=out).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ graph_index_btn0.click(get_graph_index_info, outputs=out).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ graph_index_btn1.click(clean_all_graph_index).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ graph_data_btn0.click(clean_all_graph_data).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+ graph_index_rebuild_bt.click(update_vid_embedding, outputs=out).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+
+ # origin_out = gr.Textbox(visible=False)
+ graph_extract_bt.click(
+ extract_graph, inputs=[input_file, input_text, input_schema, info_extract_template], outputs=[out]
+ ).then(store_prompt, inputs=[input_text, input_schema, info_extract_template], )
+
+ graph_loading_bt.click(import_graph_data, inputs=[out, input_schema], outputs=[out]).then(
+ update_vid_embedding).then(
+ store_prompt,
+ inputs=[input_text, input_schema, info_extract_template],
+ )
+
+ def on_tab_select(input_f, input_t, evt: gr.SelectData):
+ print(f"You selected {evt.value} at {evt.index} from {evt.target}")
+ if evt.value == "file":
+ return input_f, ""
+ if evt.value == "text":
+ return [], input_t
+ return [], ""
+
+ tab_upload_file.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
+ tab_upload_text.select(fn=on_tab_select, inputs=[input_file, input_text], outputs=[input_file, input_text])
return input_text, input_schema, info_extract_template
+
async def timely_update_vid_embedding(interval_seconds: int = 3600):
"""
Periodically updates vertex embeddings in the graph database.
diff --git a/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py
new file mode 100644
index 000000000..a7ea1e3dd
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/operators/llm_op/prompt_generate.py
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+
+import json
+import os
+from typing import Dict, Any
+
+from hugegraph_llm.config import resource_path, prompt as prompt_tpl
+from hugegraph_llm.models.llms.base import BaseLLM
+from hugegraph_llm.utils.log import log
+
+
+class PromptGenerate:
+ def __init__(self, llm: BaseLLM):
+ self.llm = llm
+
+ def _load_few_shot_example(self, example_name: str) -> Dict[str, Any]:
+ """Loads and finds the specified few-shot example from the unified JSON file."""
+ examples_path = os.path.join(resource_path, "prompt_examples", "prompt_examples.json")
+ if not os.path.exists(examples_path):
+ raise FileNotFoundError(f"Examples file not found: {examples_path}")
+ with open(examples_path, "r", encoding="utf-8") as f:
+ all_examples = json.load(f)
+ for example in all_examples:
+ if example.get("name") == example_name:
+ return example
+ raise ValueError(f"Example with name '{example_name}' not found in prompt_examples.json")
+
+ def run(self, context: Dict[str, Any]) -> Dict[str, Any]:
+ """Executes the core logic of prompt generation."""
+ source_text = context.get("source_text")
+ scenario = context.get("scenario")
+ example_name = context.get("example_name")
+
+ if not all([source_text, scenario, example_name]):
+ raise ValueError("Missing required context: source_text, scenario, or example_name.")
+ few_shot_example = self._load_few_shot_example(example_name)
+
+ meta_prompt = prompt_tpl.generate_extract_prompt_template.format(
+ few_shot_text=few_shot_example.get('text', ''),
+ few_shot_prompt=few_shot_example.get('prompt', ''),
+ user_text=source_text,
+ user_scenario=scenario
+ )
+ log.debug("Meta-prompt sent to LLM: %s", meta_prompt)
+ generated_prompt = self.llm.generate(prompt=meta_prompt)
+ log.debug("Generated prompt from LLM: %s", generated_prompt)
+
+ context["generated_extract_prompt"] = generated_prompt
+ return context
diff --git a/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json b/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json
new file mode 100644
index 000000000..f3bd33c37
--- /dev/null
+++ b/hugegraph-llm/src/hugegraph_llm/resources/prompt_examples/prompt_examples.json
@@ -0,0 +1,26 @@
+[
+ {
+ "name": "Official Person-Relationship Extraction",
+ "description": "A standard template for extracting Person and Webpage entities, along with their relationships (Roommate, Owns), from descriptive text.",
+ "text": "Meet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist. Additionally, Sarah is the proud owner of the website www.sarahsplace.com.",
+ "prompt": "## Main Task\nGiven the following graph schema and a piece of text, your task is to analyze the text and extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. If there are multiple primary keys, the strategy for generating VID is: vertexlabelID:pk1!pk2!pk3 (pk means primary key, and '!' is the separator). This id must be generated ONLY if there are multiple primary keys. If there is only one primary key, the strategy for generating VID is: int (sequencially increasing).\n4. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n5. Translate the schema fields into Chinese if the given text input is Chinese (Optional)\n\n## Example:\n### Input example:\n#### text:\nMeet Sarah, a 30-year-old attorney, and her roommate, James, whom she's shared a home with since 2010. James, in his professional life, works as a journalist.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"person\",\"properties\":[\"name\",\"age\",\"occupation\"]}], \"edges\":[{\"edge_label\":\"roommate\", \"source_vertex_label\":\"person\",\"target_vertex_label\":\"person\",\"properties\":[\"date\"]}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Sarah\",\"label\":\"person\",\"type\":\"vertex\",\"properties\":{\"name\":\"Sarah\",\"age\":30,\"occupation\":\"attorney\"}},{\"id\":\"1:James\",\"label\":\"person\",\"type\":\"vertex\",\"properties\":{\"name\":\"James\",\"occupation\":\"journalist\"}}], \"edges\":[{\"label\":\"roommate\",\"type\":\"edge\",\"outV\":\"1:Sarah\",\"outVLabel\":\"person\",\"inV\":\"1:James\",\"inVLabel\":\"person\",\"properties\":{\"date\":\"2010\"}}]}"
+ },
+ {
+ "name": "Traffic Accident Element Extraction",
+ "description": "Extracts key elements from a traffic accident report, including persons involved, vehicles, and responsibilities.",
+ "text": "On March 15, 2024, John Smith, driving a red Porsche with license plate NY-88888, collided with a scooter ridden by Mike Lee at the intersection of People's Road and Liberation Road. The collision resulted in a fracture in Mike Lee's right leg. The traffic police determined that John Smith was fully responsible for running a red light.",
+ "prompt": "## Main Task\nGiven the following graph schema and a piece of text about a traffic accident, your task is to extract information that fits into the schema's structure, formatting the information into vertices and edges as specified.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nOn March 15, 2024, John Smith, driving a red Porsche with license plate NY-88888, collided with a scooter ridden by Mike Lee at the intersection of People's Road and Liberation Road. The collision resulted in a fracture in Mike Lee's right leg. The traffic police determined that John Smith was fully responsible for running a red light.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Person\",\"properties\":[\"name\",\"injury_level\"]},{\"vertex_label\":\"Vehicle\",\"properties\":[\"license_plate\",\"type\",\"color\"]},{\"vertex_label\":\"Accident\",\"properties\":[\"date\",\"location\",\"responsible_party\"]}], \"edges\":[{\"edge_label\":\"drives\",\"source_label\":\"Person\",\"target_label\":\"Vehicle\"},{\"edge_label\":\"participated_in\",\"source_label\":\"Person\",\"target_label\":\"Accident\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:John Smith\",\"label\":\"Person\",\"type\":\"vertex\",\"properties\":{\"name\":\"John Smith\"}},{\"id\":\"1:Mike Lee\",\"label\":\"Person\",\"type\":\"vertex\",\"properties\":{\"name\":\"Mike Lee\",\"injury_level\":\"right leg fracture\"}},{\"id\":\"2:NY-88888\",\"label\":\"Vehicle\",\"type\":\"vertex\",\"properties\":{\"license_plate\":\"NY-88888\",\"type\":\"Porsche\",\"color\":\"red\"}}],\"edges\":[{\"label\":\"drives\",\"type\":\"edge\",\"outV\":\"1:John Smith\",\"outVLabel\":\"Person\",\"inV\":\"2:NY-88888\",\"inVLabel\":\"Vehicle\",\"properties\":{}}]}"
+ },
+ {
+ "name": "Financial Event Extraction",
+ "description": "Extracts key financial information such as companies, acquisition events, and amounts from financial news.",
+ "text": "Tech giant Company A announced yesterday that it will fully acquire startup Company B, which operates in the artificial intelligence sector, for a price of $2 billion. The acquisition is expected to be completed by the end of the year.",
+ "prompt": "## Main Task\nGiven the following graph schema and a piece of financial news, your task is to extract information about corporate mergers and acquisitions.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nTech giant Company A announced yesterday that it will fully acquire startup Company B, which operates in the artificial intelligence sector, for a price of $2 billion. The acquisition is expected to be completed by the end of the year.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Company\",\"properties\":[\"name\",\"field\"]},{\"vertex_label\":\"Acquisition\",\"properties\":[\"price\",\"currency\",\"status\"]}], \"edges\":[{\"edge_label\":\"acquirer\",\"source_label\":\"Acquisition\",\"target_label\":\"Company\"},{\"edge_label\":\"acquired\",\"source_label\":\"Acquisition\",\"target_label\":\"Company\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Company A\",\"label\":\"Company\",\"type\":\"vertex\",\"properties\":{\"name\":\"Company A\",\"field\":\"Tech\"}},{\"id\":\"1:Company B\",\"label\":\"Company\",\"type\":\"vertex\",\"properties\":{\"name\":\"Company B\",\"field\":\"artificial intelligence\"}},{\"id\":\"2:A acquires B\",\"label\":\"Acquisition\",\"type\":\"vertex\",\"properties\":{\"price\":2000000000,\"currency\":\"USD\",\"status\":\"expected to be completed by year-end\"}}],\"edges\":[{\"label\":\"acquirer\",\"type\":\"edge\",\"outV\":\"2:A acquires B\",\"outVLabel\":\"Acquisition\",\"inV\":\"1:Company A\",\"inVLabel\":\"Company\",\"properties\":{}},{\"label\":\"acquired\",\"type\":\"edge\",\"outV\":\"2:A acquires B\",\"outVLabel\":\"Acquisition\",\"inV\":\"1:Company B\",\"inVLabel\":\"Company\",\"properties\":{}}]}"
+ },
+ {
+ "name": "Medical Diagnosis Extraction",
+ "description": "Extracts patients, symptoms, diagnosis results, and recommended drugs from medical record text.",
+ "text": "Patient Li Hua, presents with a headache and fever for three days. After examination, the diagnosis is a viral cold. It is recommended to take the drug 'Gankang' for treatment.",
+ "prompt": "## Main Task\nGiven the following graph schema and a piece of medical record, your task is to extract entities and relationships related to diagnosis and treatment.\n\n## Basic Rules:\n### Schema Format:\nGraph Schema:\n- \"vertices\": [List of vertex labels and their properties]\n- \"edges\": [List of edge labels, their source and target vertex labels, and properties]\n\n### Content Rule:\nPlease read the provided text carefully and identify any information that corresponds to the vertices and edges defined in the schema.\nYou are not allowed to modify the schema contraints. Your task is to format the provided information into the required schema, without missing any keyword.\nFor each piece of information that matches a vertex or edge, format it strictly according to the following JSON structures:\n\n#### Vertex Format:\n{\"id\":\"vertexLabelID:entityName\",\"label\":\"vertexLabel\",\"type\":\"vertex\",\"properties\":{\"propertyName\":\"propertyValue\", ...}}\n\n#### Edge Format:\n{\"id\":\"vertexlabelID:pk1!pk2!pk3\", \"label\":\"edgeLabel\",\"type\":\"edge\",\"outV\":\"sourceVertexId\",\"outVLabel\":\"sourceVertexLabel\",\"inV\":\"targetVertexId\",\"inVLabel\":\"targetVertexLabel\",\"properties\":{\"propertyName\":\"propertyValue\",...}}\n\nStrictly follow these rules:\n1. Don't extract property fields or labels that doesn't exist in the given schema. Do not generate new information.\n2. Ensure the extracted property set in the same type as the given schema (like 'age' should be a number, 'select' should be a boolean).\n3. Output in JSON format, only include vertexes and edges & remove empty properties, extracted and formatted based on the text/rules and schema.\n\n## Example:\n### Input example:\n#### text:\nPatient Li Hua, presents with a headache and fever for three days. After examination, the diagnosis is a viral cold. It is recommended to take the drug 'Gankang' for treatment.\n\n#### graph schema example:\n{\"vertices\":[{\"vertex_label\":\"Patient\",\"properties\":[\"name\"]},{\"vertex_label\":\"Symptom\",\"properties\":[\"name\"]},{\"vertex_label\":\"Diagnosis\",\"properties\":[\"name\"]},{\"vertex_label\":\"Drug\",\"properties\":[\"name\"]}], \"edges\":[{\"edge_label\":\"has_symptom\",\"source_label\":\"Patient\",\"target_label\":\"Symptom\"},{\"edge_label\":\"diagnosed_with\",\"source_label\":\"Patient\",\"target_label\":\"Diagnosis\"},{\"edge_label\":\"recommends_drug\",\"source_label\":\"Diagnosis\",\"target_label\":\"Drug\"}]}\n\n### Output example:\n{\"vertices\":[{\"id\":\"1:Li Hua\",\"label\":\"Patient\",\"type\":\"vertex\",\"properties\":{\"name\":\"Li Hua\"}},{\"id\":\"2:headache\",\"label\":\"Symptom\",\"type\":\"vertex\",\"properties\":{\"name\":\"headache\"}},{\"id\":\"2:fever\",\"label\":\"Symptom\",\"type\":\"vertex\",\"properties\":{\"name\":\"fever\"}},{\"id\":\"3:viral cold\",\"label\":\"Diagnosis\",\"type\":\"vertex\",\"properties\":{\"name\":\"viral cold\"}},{\"id\":\"4:Gankang\",\"label\":\"Drug\",\"type\":\"vertex\",\"properties\":{\"name\":\"Gankang\"}}],\"edges\":[{\"label\":\"has_symptom\",\"type\":\"edge\",\"outV\":\"1:Li Hua\",\"outVLabel\":\"Patient\",\"inV\":\"2:headache\",\"inVLabel\":\"Symptom\",\"properties\":{}},{\"label\":\"diagnosed_with\",\"type\":\"edge\",\"outV\":\"1:Li Hua\",\"outVLabel\":\"Patient\",\"inV\":\"3:viral cold\",\"inVLabel\":\"Diagnosis\",\"properties\":{}},{\"label\":\"recommends_drug\",\"type\":\"edge\",\"outV\":\"3:viral cold\",\"outVLabel\":\"Diagnosis\",\"inV\":\"4:Gankang\",\"inVLabel\":\"Drug\",\"properties\":{}}]}"
+ }
+]