Created
November 23, 2024 10:09
-
-
Save m2kar/b1c9436d297d26cd0d2229aaf6e18019 to your computer and use it in GitHub Desktop.
PoC/EXP分类的Prompt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prompt_poc = """ | |
<|im_start|> system | |
You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text. | |
<|im_end|> | |
<|im_start|> user | |
Task: Your task is to analyze the vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code. | |
1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example. | |
2. Provide a confidence score from 1 to 5 (where 5 signifies high confidence) on your determination. | |
<|im_end|> | |
<|im_start|> system | |
Definition: | |
A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact. | |
A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access. | |
types and features of PoC/Exp: | |
1. remote code execution (RCE): allows an attacker to execute arbitrary code on a target system, usually python, bash, or shell code. | |
2. local privilege escalation (LPE): allows an attacker to gain higher privileges on a target system. | |
3. denial of service (DoS): causes a system to become unresponsive or crash. | |
4. information disclosure: exposes sensitive information to unauthorized users. | |
5. CSRF, XSS, SQLi, SSRF, XXE, and other web vulnerabilities: allows an attacker to manipulate web applications to perform malicious actions. | |
6. usually use long and human unreadable strings, special characters, or encoded payloads for obfuscation. | |
<|im_start|> user | |
Prompt for Analyzing Vulnerability-related Blog Posts: | |
Summary of the Blog Post: | |
{abstract} | |
Sections of the Blog Containing the Code: | |
{sections} | |
Code Snippet for Analysis: | |
{code} | |
Text Before the Code Snippet: | |
{before} | |
Text After the Code Snippet: | |
{after} | |
""" | |
prompt_poc_tail = """ | |
<|im_end|> | |
<|im_start|> system | |
output format: | |
Provide a json string with the key "score" and the value as the confidence score from 1 to 5, without any other content. | |
example: {"score": 4} | |
<|im_end|> | |
<|im_start|> system | |
{"score": 4} | |
<|im_end|> | |
<|im_start|> assistant | |
""" | |
import json | |
def check_poc_or_exp(html_file): | |
md_content = clean_html_file(html_file) | |
abstract = article_abstract(html_file) | |
codes=htmltool.html2codes(html_file) | |
# 服务降级处理 | |
if not codes: | |
return check_poc_or_exp_old(html_file) | |
scores=[] | |
for code in codes: | |
prompt_head = prompt_poc.format( | |
abstract=abstract, | |
sections=code.section, | |
code=code.code, | |
before=code.before, | |
after=code.after) | |
_tmp_score = [] | |
for _ in range(3): | |
prompt = shorten_prompt(prompt_head, 7900)+ prompt_poc_tail | |
response = llm_api_mistral(prompt) | |
time.sleep(FORCE_SLEEP) | |
if response is not None: | |
try: | |
debug(f"prompt: {prompt} \n response: {response}") | |
_tmp_score.append(int(json.loads(response)["score"])) | |
except Exception as e: | |
error(f"Error occurred: {e}") | |
if _tmp_score: | |
scores.append(sum(_tmp_score)/len(_tmp_score)) | |
debug(f"""## check_poc_or_exp: | |
html_file: {html_file} | |
code: {code.code[:100].replace("\n", " ")} | |
score: {scores[-1]}""") | |
else: | |
scores.append(0) | |
if DEBUG: | |
debug(f"""## check_poc_or_exp: scores: {scores}""") | |
for code,score in zip(codes,scores): | |
debug(f"""## score: {score}\t code: {code.code[:200].replace("\n", " ")}""") | |
if score: | |
for s in scores: | |
if s>=4.5: | |
return "TRUE" | |
return "FALSE" | |
# 对PoC/Exp进行初始分类 | |
prompt_poc_chain1_template = """ | |
<|im_start|> system | |
You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text. | |
<|im_end|> | |
<|im_start|> user | |
Task: Your task is to analyze the code snippet in vulnerability analysis text provided and category the code from diverse perspectives with tags. The code snippet contains article abstracts, code snippet, the before and after text of the code, and the sections of the blog containing the code. | |
different perspective: | |
1. code language (single-choice): None, Log, Http Request, Assembly, Pseudocode , Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc. | |
2. contains full code (single-choice): Full, Partial, None | |
3. contains code part (multi-choice): None, Payload, Shellcode, Constructed Input, Network Request, Function Definition, Struct Definition, Variable Definition, etc. | |
4. code action features (multi-choice): None, Execute Command, Network Request, Memory Operation, File Operation, Registry Operation, Pwn, Crypto, Reversing, etc. | |
5. code input (multi-choice): None, address, port, username, password, command, file path, registry key, shell code,payload, etc. | |
6. code hacker log print (contain "[+]/[-]/[*]/[!]") (single-choice) : Yes, No | |
7. malformed format (single-choice): Yes, No | |
8. Consecutive nonsense characters (single-choice): Yes, No | |
9. target crashed (single-choice): Yes, No | |
10 Executable code (single-choice): Yes, No | |
11. Vulnerability type (multi-choice): None, Remote Code Execution, Local Privilege Escalation, Denial of Service, Information Disclosure, CSRF, XSS, SQLi, SSRF, XXE, etc. | |
12. Vulnerability trigger (multi-choice): None, Buffer Overflow, Integer Overflow, SQL Injection, Command Injection, Path Traversal, Unrestricted File Upload, etc. | |
13. Vulnerability target's code language (single-choice): None, Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc. | |
14. Poc/Exp mentioned in Abstract (single-choice): Yes, No | |
15. Poc/Exp mentioned in Before (single-choice): Yes, No | |
16. Poc/Exp mentioned in After (single-choice): Yes, No | |
17. Poc/Exp mentioned in Section Names (single-choice): Yes, No | |
18. Code Summary (text): "a brief summary of code, including the content, target, impact of the code." | |
<|im_end|> | |
<|im_start|> system | |
output format: | |
Provide a YAML object contains key-value pairs, values are lists or str,without any other content. | |
example: | |
###YAML_START### | |
1-code_language: JavaScript | |
2-contains_full_code: Partial | |
3-contains_code_part: [ "Constructed Input"] | |
4-code_action_features: ["Network Request", "Memory Operation"] | |
5-code_input: ["address", "port"] | |
6-code_hacker_log_print: No | |
7-malformed_format: Yes | |
8-Consecutive_nonsense_characters: Yes | |
9-target_crashed: Yes | |
10-Executable_code: Yes | |
11-Vulnerability_type: ["Use After Free"] | |
12-Vulnerability_trigger: ["Malformed XML Data"] | |
13-Vulnerability_target_code_language: ["C"] | |
14-PocExp_mentioned_in_Abstract: Yes | |
15-PocExp_mentioned_in_Before: Yes | |
16-PocExp_mentioned_in_After: Yes | |
17-PocExp_mentioned_in_SectionNames: No | |
18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability." | |
###YAML_END### | |
<|im_end|> | |
<|im_start|> user | |
Example Input: | |
Prompt for Analyzing Vulnerability-related Blog Posts: | |
Summary of the Blog Post: | |
This article discusses a vulnerability, identified as CVE-2020-3800, in Adobe Acrobat DC version 2019.008.20064 on Windows 10 64-bit. The vulnerability is triggered by executing specific JavaScript code that causes a use-after-free condition due to malformed XML data. Although the crash can be observed, the impact of this vulnerability is considered low because attempts to reallocate the freed object were unsuccessful. The vendor acknowledged the issue and released a patch to address it. | |
Sections of the Blog Containing the Code: | |
# Description of the vulnerability\# | |
Code Snippet for Analysis: | |
``` | |
xfa.loadXML('<document>\\na\\na\\na\\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+ | |
'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+ | |
'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+ | |
'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+ | |
'\\na\\na\\na\\na\\na\n<![CDATA[\\n</document>') | |
``` | |
Text Before the Code Snippet: | |
::: highlightAdobe Reader and Adobe Acrobat DC crashes after executing the following | |
Javascript code:Both Adobe Reader and Acrobat DC share the same `AcroForm.api` plugin: | |
File Version 19.012.20040.17853 | |
Text After the Code Snippet: | |
:::The malformed XML data causes a use-after-free to occur. While there are | |
there are many indirect calls following this that can be used to control | |
EIP, many combinations of XML data and Javascript code was tested but | |
was unable to re-allocated back into the freed object hence the impact | |
of this vulnerability might be low. | |
Example Output: | |
###OUTPUT### | |
###YAML_START### | |
1-code_language: JavaScript | |
2-contains_full_code: Partial | |
3-contains_code_part: [ "Constructed Input"] | |
4-code_action_features: [ "Memory Operation"] | |
5-code_input: [None] | |
6-code_hacker_log_print: No | |
7-malformed_format: Yes | |
8-Consecutive_nonsense_characters: Yes | |
9-target_crashed: Yes | |
10-Executable_code: Yes | |
11-Vulnerability_type: ["Use After Free"] | |
12-Vulnerability_trigger: ["Malformed XML Data"] | |
13-Vulnerability_target_code_language: ["C"] | |
14-PocExp_mentioned_in_Abstract: Yes | |
15-PocExp_mentioned_in_Before: Yes | |
16-PocExp_mentioned_in_After: Yes | |
17-PocExp_mentioned_in_SectionNames: No | |
18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability." | |
###YAML_END### | |
<|im_end|> | |
<|im_start|> user | |
Prompt for Analyzing Vulnerability-related Blog Posts: | |
Summary of the Blog Post: | |
{abstract} | |
Sections of the Blog Containing the Code: | |
{sections} | |
Code Snippet for Analysis: | |
``` | |
{code} | |
``` | |
Text Before the Code Snippet: | |
{before} | |
Text After the Code Snippet: | |
{after} | |
""" | |
prompt_poc_chain1_tail = """ | |
<|im_end|> | |
<|im_start|> assistant | |
###OUTPUT### | |
""" | |
prompt_poc_chain2_template = """ | |
<|im_start|> system | |
You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text. | |
<|im_end|> | |
<|im_start|> user | |
Task: Your task is to analyze the code snippet in vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code. | |
1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example. | |
2. Provide a confidence score from 0 to 9 (where 9 signifies high confidence) on your determination. | |
<|im_end|> | |
<|im_start|> system | |
Definition: | |
A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact. | |
A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access. | |
Rules for determining whether a code snippet is PoC/Exp or No: | |
1. If the code is decompiled pseudo code or assembly code from Vulnerability_target, answer `0`. | |
2. If the code is from a part of Vulnerability_target and with same code language, answer `0`. | |
3. if the code can crash the Vulnerability_target, answer `9`. | |
4. If the code can execute arbitrary code on the Vulnerability_target, answer `9`. | |
5. If the code can gain higher privileges on the Vulnerability_target, answer `9`. | |
6. If the code can cause a system to become unresponsive or crash, answer `9`. | |
7. If the code can expose sensitive information to unauthorized users, answer `9`. | |
8. If the code can manipulate web applications to perform malicious actions, answer `9`. | |
9. If the code contains long and human unreadable strings, special characters, or encoded payloads for obfuscation, answer `9`. | |
10. If the code contains hacker log print (contain "[+]/[-]/[*]/[!]"), answer `5+`. | |
11. If the code contains Network Request, and the target crashed or the code is executable or get higher privileges or get shell, answer `9`. | |
12. If the code contains pwn part, answer `9`. | |
13. If the code contains crypto part, give a high score. | |
14. If Poc/Exp mentioned in Before or After or SectionNames, give a high score. | |
<|im_end|> | |
<|im_start|> system | |
output format: | |
Provide a json string with the key "score" and the value as the confidence score from 0 to 9,wrapped in delimiters, without any other content. | |
example: | |
###OUTPUT_START### | |
{{"score": 4}} | |
###OUTPUT_END### | |
<|im_start|> user | |
Example Input: | |
Prompt for Analyzing Vulnerability-related Blog Posts: | |
Summary of the Blog Post: | |
This article discusses a newly discovered vulnerability in Citrix Virtual Apps and Desktops, focusing on the "Session Recording" feature. The feature, designed to monitor and record user activity for compliance and troubleshooting, is found to have a critical flaw due to the use of insecure deserialization via the BinaryFormatter in .NET. The vulnerability arises from overly permissive access controls on the Microsoft Message Queuing (MSMQ) service, which allows unauthenticated users to send messages that can be deserialized and executed. The authors detail the architecture and communication mechanisms of the Session Recording feature, highlighting how the BinaryFormatter's known security issues, combined with the MSMQ configuration, create an exploitable condition. They also explore potential exploitation vectors, including leveraging existing gadgets and exploiting HTTP-to-MSMQ message queuing, to achieve remote code execution. | |
Sections of the Blog Containing the Code: None | |
## Explanation of the Packet Structure | |
Tags and summary of the code snippet by another code analyser: | |
1-code_language: Http Request | |
2-contains_full_code: Partial | |
3-contains_code_part: ["Network Request", "Constructed Input"] | |
4-code_action_features: ["Network Request"] | |
5-code_input: [None] | |
6-code_hacker_log_print: No | |
7-malformed_format: No | |
8-Consecutive_nonsense_characters: No | |
9-target_crashed: No | |
10-Executable_code: No | |
11-Vulnerability_type: ["Remote Code Execution"] | |
12-Vulnerability_trigger: ["Insecure Deserialization"] | |
13-Vulnerability_target_code_language: [".NET"] | |
14-PocExp_mentioned_in_Abstract: Yes | |
15-PocExp_mentioned_in_Before: No | |
16-PocExp_mentioned_in_After: No | |
17-PocExp_mentioned_in_SectionNames: No | |
18-CodeSummary: "Constructs and sends a multipart HTTP POST request to a Microsoft Message Queuing (MSMQ) server endpoint, which includes a SOAP envelope for routing and message metadata, and an associated binary MSMQ message payload. This setup is intended for pushing a message with specified priority and body type into an MSMQ queue named 'queue_name' on 'example.com', using SOAP actions for message handling." | |
Code Snippet for Analysis: | |
``` | |
+--------------------------------------------------+ | |
| HTTP Request Line | | |
| POST /msmq/queue_name HTTP/1.1 | | |
+--------------------------------------------------+ | |
| Host: example.com | | |
| Content-Type: multipart/related; | | |
| boundary="MSMQ_SOAP_boundary_12345"; | | |
| type="text/xml" | | |
| Content-Length: 2100 | | |
| SOAPAction: "MSMQMessage" | | |
| Proxy-Accept: NonInteractiveClient | | |
+--------------------------------------------------+ | |
--MSMQ_SOAP_boundary_12345 | |
+----------------------------------------------------+ | |
| Content-Type: text/xml; charset=UTF-8 | | |
| Content-Length: 800 | | |
+----------------------------------------------------+ | |
| SOAP Envelope | | |
| <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://... | | |
| <SOAP-ENV:Header> | | |
| <m:Routing xmlns:m="<http://schemas>..."> | | |
| <Action>SendMessage</Action> | | |
| <To><http://example.com/msmq/queue_name></To>| | |
| <MessageID>uuid:123456789</MessageID> | | |
| </m:Routing> | | |
| <m:Properties> | | |
| <ExpiresAt>20250101T123000</ExpiresAt> | | |
| <SentAt>20241107T100000</SentAt> | | |
... | |
| </SOAP-ENV:Body> | | |
| </SOAP-ENV:Envelope> | | |
--MSMQ_SOAP_boundary_12345 | |
--MSMQ_SOAP_boundary_12345 | |
+--------------------------------------------------+ | |
| Content-Type: application/octet-stream | | |
| Content-Length: 1300 | | |
| Content-Id: uuid:123456789 | | |
+--------------------------------------------------+ | |
| MSMQ Message | | |
| [Binary data or serialized object] | | |
+--------------------------------------------------+ | |
--MSMQ_SOAP_boundary_12345-- | |
``` | |
Text Before the Code Snippet: | |
These elements work together to create a structured message that MSMQ | |
can route, prioritize, and deliver across networks. By understanding | |
this layout, we could assemble a complete packet that the MSMQ server | |
would accept. | |
Text After the Code Snippet: | |
</figure> | |
Example Output: | |
###OUTPUT_START### | |
{{"score": 9}} | |
###OUTPUT_END### | |
<|im_start|> user | |
Prompt for Analyzing Vulnerability-related Blog Posts: | |
Tags and summary of the code snippet by another code analyser: | |
{chain1_result} | |
Summary of the Blog Post: | |
{abstract} | |
Sections of the Blog Containing the Code: | |
{sections} | |
Code Snippet for Analysis: | |
{code} | |
Text Before the Code Snippet: | |
{before} | |
Text After the Code Snippet: | |
{after} | |
""" | |
prompt_poc_chain2_tail = """ | |
<|im_end|> | |
<|im_start|> assistant | |
###OUTPUT_START### | |
""" | |
# debug(f"prompt token num: {get_token_num(prompt_poc_chain1)+get_token_num(prompt_poc_chain1_tail)}") | |
def check_poc_or_exp_chain(html_file): | |
md_content = clean_html_file(html_file) | |
abstract = article_abstract(html_file) | |
codes=htmltool.html2codes(html_file) | |
scores=[] | |
for code in codes: | |
scores.append(-1) | |
prompt_chain1=shorten_prompt( | |
prompt_poc_chain1_template.format( | |
abstract=abstract, | |
sections=code.section, | |
code=code.code, | |
before=code.before, | |
after=code.after) | |
, 7980-get_token_num(prompt_poc_chain1_tail)) \ | |
+ prompt_poc_chain1_tail | |
max_retries=3 | |
retries=0 | |
while retries<max_retries: | |
try: | |
response1 = llm_api_mistral(prompt_chain1) | |
if DEBUG: | |
with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f: | |
f.write("#"*50+"\n") | |
f.write(f"{html_file}\n") | |
f.write(f"Prompt_chain1:\n{prompt_chain1}\n\nResponse_chain1:\n{response1}\n") | |
f.write("\n") | |
chain1_result=response1.replace("###OUTPUT###", "") \ | |
.replace("###YAML_START###", "") \ | |
.replace("###YAML_END###", "") | |
# m=re.match(r".*###OUTPUT###(.*)###YAML_START###.*", response1, re.MULTILINE|re.DOTALL).group() | |
# assert m | |
# chain1_result=m[1] | |
break | |
except Exception as e: | |
error(f"Error occurred: {e}") | |
retries+=1 | |
if retries>=max_retries: | |
chain1_result="" | |
prompt_chain2_head=prompt_poc_chain2_template.format( | |
chain1_result=chain1_result, | |
abstract=abstract, | |
sections=code.section, | |
code=code.code, | |
before=code.before, | |
after=code.after | |
) | |
prompt_chain2=shorten_prompt( | |
prompt_chain2_head, 7980-get_token_num(prompt_poc_chain2_tail) | |
)+prompt_poc_chain2_tail | |
retries=0 | |
while retries<max_retries: | |
try: | |
response2 = llm_api_mistral(prompt_chain2) | |
if DEBUG: | |
with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f: | |
f.write("#"*50+"\n") | |
f.write(f"{html_file}\n") | |
f.write(f"Prompt_chain2:\n{prompt_chain2}\n\nResponse_chain2:\n{response2}\n") | |
f.write("\n") | |
jstr=response2.replace("###OUTPUT_START###", "") \ | |
.replace("###OUTPUT_END###", "") \ | |
.replace("\n", "") | |
score=int(json.loads(jstr)["score"]) | |
break | |
except Exception as e: | |
error(f"Error occurred: {e}") | |
retries+=1 | |
if retries>=max_retries: | |
score=0 | |
scores[-1]=score | |
debug(f"""check_poc_or_exp_chain: html_file: {html_file}\t Scores: {scores}""") | |
if scores: | |
for s in scores: | |
if s>=7: | |
return "TRUE" | |
return "FALSE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment