Diff of /src/prompt.py [000000] .. [014e6e]

Switch to unified view

a b/src/prompt.py
1
#!/usr/bin/env python
2
# -*- coding: UTF-8 -*-
3
'''
4
@Project :Auto-BioinfoGPT 
5
@File    :prompt.py
6
@Author  :Juexiao Zhou
7
@Contact : juexiao.zhou@gmail.com
8
@Date    :2023/5/2 11:07 
9
'''
10
from copy import deepcopy
11
from src.build_RAG_private import retrive
12
import time
13
14
class PromptGenerator:
15
    def __init__(self, blacklist='', engine = None, rag = False, retriever = None):
16
        self.history_summary = ''
17
        self.current_goal = None
18
        self.global_goal = None
19
        self.tasks = None
20
        self.engine = engine
21
        self.rag = rag
22
        self.retriever = retriever
23
        self.blacklist = blacklist.split(',')
24
        self.speciallist = ['sra-toolkit: mamba install sra-tools',
25
                            'trim_galore: mamba install trim-galore']
26
27
    def get_executor_prompt(self, executor_info):
28
        prompt = {
29
            "task": "I executed a Bash script and obtained log output detailing its execution. Kindly assist me in assessing the success of the script. If it encounters any failures, please aid in summarizing the reasons for the failure and propose modifications to the code.",
30
            "rules": [
31
                "You should only respond in JSON format with fixed format.",
32
                "Your JSON response should only be enclosed in double quotes.",
33
                "No such file or directory is error."
34
                "You should not write anything outside {}.",
35
                "You should make your answer as detailed as possible.",
36
            ],
37
            "log output": [
38
                executor_info
39
            ],
40
            "fixed format": {
41
                "stat": "0 or 1, 0 indicates failure and 1 indicates success",
42
                "info": "summarize errors in one sentence."
43
            }
44
        }
45
        final_prompt = prompt
46
        return final_prompt
47
48
    def get_prompt(self, data_list, goal_description, global_round, execute_success=True, execute_info=None, last_execute_code=None):
49
        """
50
51
        :param data_list: ['data path: data description']
52
        :param goal_description: 'goal'
53
        :param global_round: int
54
        :return:
55
        """
56
        self.current_goal = goal_description
57
        if global_round == 0:
58
            self.global_goal = goal_description
59
            prompt = {
60
                    "role": "Act as a bioinformatician, the rules must be strictly followed!",
61
                    "rules": [
62
                        "When acting as a bioinformatician, you strictly cannot stop acting as a bioinformatician.",
63
                        "All rules must be followed strictly.",
64
                        "You should use information in input to write a detailed plan to finish your goal.",
65
                        f"You should include the software name and should not use those software: {self.blacklist}.",
66
                        "You should only respond in JSON format with my fixed format.",
67
                        "Your JSON response should only be enclosed in double quotes and you can have only one JSON in your response.",
68
                        "You should not write loading data as a separate step.",
69
                        "You should not write anything else except for your JSON response.",
70
                        "You should make your answer as detailed as possible."
71
                    ],
72
                    "input": [
73
                            "You have the following information in a list with the format file path: file description. I provide those files to you, so you don't need to prepare the data.",
74
                            data_list
75
                        ],
76
                    "goal": self.current_goal,
77
                    "fixed format for JSON response": {
78
                        "plan": [
79
                            "Your detailed step-by-step sub-tasks in a list to finish your goal in the format: use some tool to do some task."
80
                        ]
81
                    }
82
                }
83
            final_prompt = prompt
84
        else:
85
            if self.rag:
86
                retriever_info = retrive(self.retriever,
87
                                         retriever_prompt=f'{self.current_goal}')
88
            else:
89
                retriever_info = ''
90
            prompt = {
91
                "role": "Act as a bioinformatician, the rules must be strictly followed!",
92
                "rules": [
93
                    "When acting as a bioinformatician, you strictly cannot stop acting as a bioinformatician.",
94
                    "All rules must be followed strictly.",
95
                    "You are provided a system with specified constraints."
96
                    "The history of what you have done is provided, you should take the name changes of some files into account, or use some output from previous steps.",
97
                    "You should use all information you have to write bash codes to finish your current task.",
98
                    "All code requirements must be followed strictly when you write codes.",
99
                    "You should only respond in JSON format with my fixed format.",
100
                    "Your JSON response should only be enclosed in double quotes.",
101
                    "You should make your answer as simple as possible.",
102
                    "You should not write anything else except for your JSON response.",
103
                    'You should use full absolute path for all files.',
104
                ],
105
                "system": [
106
                    "You have a Ubuntu 18.04 system",
107
                    "You have a mamba environment named abc_runtime",
108
                    "You do not have any other software installed"
109
                ],
110
                "input": [
111
                        "You have the following information in a list with the format file path: file description. I provide those files to you, so you don't need to prepare the data.",
112
                        data_list
113
                    ],
114
                "history": self.history_summary,
115
                "current task": self.current_goal,
116
                "code requirement": [
117
                    f"You should not use those software: {self.blacklist}.",
118
                    "You should not create and activate the mamba environment abc_runtime.",
119
                    'You should install dependencies and software you need to use with mamba or pip with -y.',
120
                    'You should pay attention to the number of input files and do not miss any.',
121
                    'You should process each file independently and can not use FOR loop.',
122
                    'You should use the default values for all parameters that are not specified.',
123
                    'You should not repeat what you have done in history.',
124
                    'You should only use software directly you installed with mamba or pip.',
125
                    'If you use Rscript -e, you should make sure all variables exist in your command, otherwise, you need to check your history to repeat previous steps and generate those variables.',
126
                    "You should not write anything else except for your JSON response.",
127
                    "If RAG is provided, you should use it as template to write codes. You should not copy the RAG directly."
128
                ],
129
                "RAG: If provided, you should replace <...> with correct values and file paths based on information in history": retriever_info,
130
                "fixed format for JSON response": {
131
                    "tool": "name of the tool you use",
132
                    "code": "bash code to finish the current task in one line."
133
                }
134
            }
135
            if execute_success:
136
                final_prompt = prompt
137
            else:
138
                final_prompt = prompt
139
                final_prompt['history'] += f' You previously generated codes: {last_execute_code}. However, your code has errors and you should fix them: {execute_info}.'
140
                #final_prompt['code requirement'].append(f' You previously generated codes: {last_execute_code}. However, your code has errors and you should fix them: {execute_info}. You should use those software in correct way: {self.speciallist}')
141
142
        return final_prompt
143
144
    def set_tasks(self, tasks):
145
        self.tasks = deepcopy(tasks)
146
147
    def slow_print(self, input_string, speed=0.01):
148
        for char in str(input_string):
149
            # 使用print函数打印每个字符,并设置end参数为空字符串,以避免在每个字符之间输出换行符
150
            try:
151
                print(char, end='', flush=True)
152
            except:
153
                print(char, end='')
154
            time.sleep(speed)
155
        print()
156
157
    def format_user_prompt(self, prompt, global_round, gui_mode):
158
        INFO_STR = ''
159
        if gui_mode:
160
            print(f'[Round {global_round}]')
161
            print(f'[USER]')
162
            INFO_STR += f'[Round {global_round}] \n\n'
163
            for key in prompt:
164
                self.slow_print(f"{key}", speed=0.001)
165
                self.slow_print(prompt[key], speed=0.001)
166
                INFO_STR += f"{key} \n\n {prompt[key]} \n\n"
167
        else:
168
            print(f'\033[31m[Round {global_round}]\033[0m')
169
            print(f'\033[32m[USER]\033[0m')
170
            INFO_STR += f'\033[31m[Round {global_round}]\033[0m \n\n'
171
            for key in prompt:
172
                self.slow_print(f"\033[34m{key}\033[0m", speed=0.001)
173
                self.slow_print(prompt[key], speed=0.001)
174
                INFO_STR += f"\033[34m{key}\033[0m \n\n {prompt[key]} \n\n"
175
        print()
176
        return INFO_STR
177
178
    def format_ai_response(self, response_message, gui_mode):
179
        INFO_STR = ''
180
        if gui_mode:
181
            print(f'[AI]')
182
            for key in response_message:
183
                self.slow_print(f"{key}", speed=0.01)
184
                self.slow_print(response_message[key], speed=0.01)
185
                INFO_STR += f"{key} \n\n {response_message[key]} \n\n"
186
            print(f'-------------------------------------')
187
        else:
188
            print(f'\033[32m[AI]\033[0m')
189
            for key in response_message:
190
                self.slow_print(f"\033[34m{key}\033[0m", speed=0.01)
191
                self.slow_print(response_message[key], speed=0.01)
192
                INFO_STR += f"\033[34m{key}\033[0m \n\n {response_message[key]} \n\n"
193
            print(f'\033[33m-------------------------------------\033[0m')
194
        print()
195
        return INFO_STR
196
197
    def add_history(self, task, global_round, data_list, code = None):
198
        if global_round == 0:
199
            self.history_summary += f"Firstly, you have input with the format 'file path: file description' in a list: {data_list}. You wrote a detailed plan to finish your goal. Your global goal is {self.global_goal}. Your plan is {self.tasks}. \n"
200
        else:
201
            self.history_summary += f"Then, you finished the task: {task} with code: {code}.\n"