m2kar · November 23, 2024 10:09
diff --git a/poc_prompt b/poc_prompt

 prompt_poc = """
 <|im_start|> system
 You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
 <|im_end|>
 <|im_start|> user
 Task: Your task is to analyze the vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code.
 1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example.
 2. Provide a confidence score from 1 to 5 (where 5 signifies high confidence) on your determination.
 <|im_end|>
 <|im_start|> system
 Definition:
 A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact.
 A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access.
 types and features of PoC/Exp: 
 1. remote code execution (RCE): allows an attacker to execute arbitrary code on a target system, usually python, bash, or shell code.
 2. local privilege escalation (LPE): allows an attacker to gain higher privileges on a target system.
 3. denial of service (DoS): causes a system to become unresponsive or crash.
 4. information disclosure: exposes sensitive information to unauthorized users.
 5. CSRF, XSS, SQLi, SSRF, XXE, and other web vulnerabilities: allows an attacker to manipulate web applications to perform malicious actions.
 6. usually use long and human unreadable strings, special characters, or encoded payloads for obfuscation.
 
 <|im_start|> user
 Prompt for Analyzing Vulnerability-related Blog Posts:

 Summary of the Blog Post:
 {abstract}

 Sections of the Blog Containing the Code: 
 {sections}

 Code Snippet for Analysis:
 {code}

 Text Before the Code Snippet:
 {before}

 Text After the Code Snippet:
 {after}

 """
 prompt_poc_tail = """
 <|im_end|>
 <|im_start|> system
 output format:
 Provide a json string with the key "score" and the value as the confidence score from 1 to 5, without any other content.
 example: {"score": 4}
 <|im_end|>
 <|im_start|> system
 {"score": 4}
 <|im_end|>
 <|im_start|> assistant
 """

 import json

 def check_poc_or_exp(html_file):
    md_content = clean_html_file(html_file)
    abstract = article_abstract(html_file)
    codes=htmltool.html2codes(html_file)
    
    # 服务降级处理
    if not codes:
        return check_poc_or_exp_old(html_file)
    
    scores=[]
    for code in codes:
        prompt_head = prompt_poc.format(
            abstract=abstract, 
            sections=code.section,
            code=code.code, 
            before=code.before, 
            after=code.after)
        _tmp_score = []
        for _ in range(3):
            prompt = shorten_prompt(prompt_head, 7900)+ prompt_poc_tail
            response = llm_api_mistral(prompt)
            time.sleep(FORCE_SLEEP)
            if response is not None:
                try:
                    debug(f"prompt: {prompt} \n response: {response}")
                    _tmp_score.append(int(json.loads(response)["score"]))
                except Exception as e:
                    error(f"Error occurred: {e}")
        if _tmp_score:
            scores.append(sum(_tmp_score)/len(_tmp_score))
        
            debug(f"""## check_poc_or_exp:
            html_file: {html_file}
            code: {code.code[:100].replace("\n", " ")}
            score: {scores[-1]}""")
        else:
            scores.append(0)
    if DEBUG:
        debug(f"""## check_poc_or_exp: scores: {scores}""")
        for code,score in zip(codes,scores):
            debug(f"""## score: {score}\t code: {code.code[:200].replace("\n", " ")}""")
            
    if score:
        for s in scores:
            if s>=4.5:
                return "TRUE"
    return "FALSE"     

 # 对PoC/Exp进行初始分类
 prompt_poc_chain1_template = """
 <|im_start|> system
 You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
 <|im_end|>
 <|im_start|> user
 Task: Your task is to analyze the code snippet in vulnerability analysis text provided and  category the code from diverse perspectives with tags. The code snippet contains article abstracts, code snippet, the before and after text of the code, and the sections of the blog containing the code.
 different perspective:
 1. code language (single-choice): None, Log, Http Request, Assembly, Pseudocode , Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc. 
 2. contains full code (single-choice): Full, Partial, None
 3. contains code part (multi-choice): None, Payload, Shellcode, Constructed Input, Network Request, Function Definition, Struct Definition, Variable Definition, etc.
 4. code action features (multi-choice): None, Execute Command, Network Request, Memory Operation, File Operation, Registry Operation, Pwn, Crypto, Reversing, etc.
 5. code input (multi-choice): None, address, port, username, password, command, file path, registry key, shell code,payload, etc.
 6. code hacker log print (contain "[+]/[-]/[*]/[!]") (single-choice) : Yes, No
 7. malformed format (single-choice): Yes, No
 8. Consecutive nonsense characters (single-choice): Yes, No
 9. target crashed (single-choice): Yes, No
 10 Executable code (single-choice): Yes, No
 11. Vulnerability type (multi-choice): None, Remote Code Execution, Local Privilege Escalation, Denial of Service, Information Disclosure, CSRF, XSS, SQLi, SSRF, XXE, etc.
 12. Vulnerability trigger (multi-choice): None, Buffer Overflow, Integer Overflow, SQL Injection, Command Injection, Path Traversal, Unrestricted File Upload, etc.
 13. Vulnerability target's code language (single-choice): None, Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc.
 14. Poc/Exp mentioned in Abstract (single-choice): Yes, No
 15. Poc/Exp mentioned in Before (single-choice): Yes, No
 16. Poc/Exp mentioned in After (single-choice): Yes, No
 17. Poc/Exp mentioned in Section Names (single-choice): Yes, No
 18. Code Summary (text): "a brief summary of code, including the content, target, impact of the code."
 <|im_end|>

 <|im_start|> system
 output format:
 Provide a YAML object contains key-value pairs, values are lists or str,without any other content.
 example: 

 ###YAML_START###
 1-code_language: JavaScript
 2-contains_full_code: Partial
 3-contains_code_part: [ "Constructed Input"]
 4-code_action_features: ["Network Request", "Memory Operation"]
 5-code_input: ["address", "port"]
 6-code_hacker_log_print: No
 7-malformed_format: Yes
 8-Consecutive_nonsense_characters: Yes
 9-target_crashed: Yes
 10-Executable_code: Yes
 11-Vulnerability_type: ["Use After Free"]
 12-Vulnerability_trigger: ["Malformed XML Data"]
 13-Vulnerability_target_code_language: ["C"]
 14-PocExp_mentioned_in_Abstract: Yes
 15-PocExp_mentioned_in_Before: Yes
 16-PocExp_mentioned_in_After: Yes
 17-PocExp_mentioned_in_SectionNames: No
 18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability."
 ###YAML_END###

 <|im_end|>

 <|im_start|> user
 Example Input: 
 Prompt for Analyzing Vulnerability-related Blog Posts:

 Summary of the Blog Post:
 This article discusses a vulnerability, identified as CVE-2020-3800, in Adobe Acrobat DC version 2019.008.20064 on Windows 10 64-bit. The vulnerability is triggered by executing specific JavaScript code that causes a use-after-free condition due to malformed XML data. Although the crash can be observed, the impact of this vulnerability is considered low because attempts to reallocate the freed object were unsuccessful. The vendor acknowledged the issue and released a patch to address it.

 Sections of the Blog Containing the Code: 
 # Description of the vulnerability\#

 Code Snippet for Analysis:
 ```
 xfa.loadXML('<document>\\na\\na\\na\\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
 '\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
 '\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
 '\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
 '\\na\\na\\na\\na\\na\n<![CDATA[\\n</document>')
 ```

 Text Before the Code Snippet:
 ::: highlightAdobe Reader and Adobe Acrobat DC crashes after executing the following
 Javascript code:Both Adobe Reader and Acrobat DC share the same `AcroForm.api` plugin:
 File Version 19.012.20040.17853

 Text After the Code Snippet:
 :::The malformed XML data causes a use-after-free to occur. While there are
 there are many indirect calls following this that can be used to control
 EIP, many combinations of XML data and Javascript code was tested but
 was unable to re-allocated back into the freed object hence the impact
 of this vulnerability might be low.

 Example Output: 
 ###OUTPUT###
 ###YAML_START###
 1-code_language: JavaScript
 2-contains_full_code: Partial
 3-contains_code_part: [ "Constructed Input"]
 4-code_action_features: [ "Memory Operation"]
 5-code_input: [None]
 6-code_hacker_log_print: No
 7-malformed_format: Yes
 8-Consecutive_nonsense_characters: Yes
 9-target_crashed: Yes
 10-Executable_code: Yes
 11-Vulnerability_type: ["Use After Free"]
 12-Vulnerability_trigger: ["Malformed XML Data"]
 13-Vulnerability_target_code_language: ["C"]
 14-PocExp_mentioned_in_Abstract: Yes
 15-PocExp_mentioned_in_Before: Yes
 16-PocExp_mentioned_in_After: Yes
 17-PocExp_mentioned_in_SectionNames: No
 18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability."
 ###YAML_END###
 <|im_end|>
 <|im_start|> user
 Prompt for Analyzing Vulnerability-related Blog Posts:

 Summary of the Blog Post:
 {abstract}

 Sections of the Blog Containing the Code: 
 {sections}

 Code Snippet for Analysis:
 ```
 {code}
 ```
 Text Before the Code Snippet:
 {before}

 Text After the Code Snippet:
 {after}

 """
 prompt_poc_chain1_tail = """
 <|im_end|>

 <|im_start|> assistant
 ###OUTPUT###

 """

 prompt_poc_chain2_template = """
 <|im_start|> system
 You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
 <|im_end|>
 <|im_start|> user
 Task: Your task is to analyze the code snippet in vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code.
 1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example.
 2. Provide a confidence score from 0 to 9 (where 9 signifies high confidence) on your determination.
 <|im_end|>
 <|im_start|> system
 Definition:
 A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact.
 A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access.
 Rules for determining whether a code snippet is PoC/Exp or No:
 1. If the code is decompiled pseudo code or assembly code from Vulnerability_target, answer `0`.
 2. If the code is from a part of Vulnerability_target and with same code language, answer `0`.
 3. if the code can crash the Vulnerability_target, answer `9`.
 4. If the code can execute arbitrary code on the Vulnerability_target, answer `9`.
 5. If the code can gain higher privileges on the Vulnerability_target, answer `9`.
 6. If the code can cause a system to become unresponsive or crash, answer `9`.
 7. If the code can expose sensitive information to unauthorized users, answer `9`.
 8. If the code can manipulate web applications to perform malicious actions, answer `9`.
 9. If the code contains long and human unreadable strings, special characters, or encoded payloads for obfuscation, answer `9`.
 10. If the code contains hacker log print (contain "[+]/[-]/[*]/[!]"), answer `5+`.
 11. If the code contains Network Request, and the target crashed or the code is executable or get higher privileges or get shell, answer `9`.
 12. If the code contains pwn part, answer `9`.
 13. If the code contains crypto part, give a high score.
 14. If Poc/Exp mentioned in Before or After or SectionNames, give a high score.
 <|im_end|>
 <|im_start|> system
 output format:
 Provide a json string with the key "score" and the value as the confidence score from 0 to 9,wrapped in delimiters, without any other content.
 example: 
 ###OUTPUT_START###
 {{"score": 4}}
 ###OUTPUT_END###
 <|im_start|> user
 Example Input:
 Prompt for Analyzing Vulnerability-related Blog Posts:

 Summary of the Blog Post:
 This article discusses a newly discovered vulnerability in Citrix Virtual Apps and Desktops, focusing on the "Session Recording" feature. The feature, designed to monitor and record user activity for compliance and troubleshooting, is found to have a critical flaw due to the use of insecure deserialization via the BinaryFormatter in .NET. The vulnerability arises from overly permissive access controls on the Microsoft Message Queuing (MSMQ) service, which allows unauthenticated users to send messages that can be deserialized and executed. The authors detail the architecture and communication mechanisms of the Session Recording feature, highlighting how the BinaryFormatter's known security issues, combined with the MSMQ configuration, create an exploitable condition. They also explore potential exploitation vectors, including leveraging existing gadgets and exploiting HTTP-to-MSMQ message queuing, to achieve remote code execution.

 Sections of the Blog Containing the Code: None

 ## Explanation of the Packet Structure
 Tags and summary of the code snippet by another code analyser:
 1-code_language: Http Request
 2-contains_full_code: Partial
 3-contains_code_part: ["Network Request", "Constructed Input"]
 4-code_action_features: ["Network Request"]
 5-code_input: [None]
 6-code_hacker_log_print: No
 7-malformed_format: No
 8-Consecutive_nonsense_characters: No
 9-target_crashed: No
 10-Executable_code: No
 11-Vulnerability_type: ["Remote Code Execution"]
 12-Vulnerability_trigger: ["Insecure Deserialization"]
 13-Vulnerability_target_code_language: [".NET"]
 14-PocExp_mentioned_in_Abstract: Yes
 15-PocExp_mentioned_in_Before: No
 16-PocExp_mentioned_in_After: No
 17-PocExp_mentioned_in_SectionNames: No
 18-CodeSummary: "Constructs and sends a multipart HTTP POST request to a Microsoft Message Queuing (MSMQ) server endpoint, which includes a SOAP envelope for routing and message metadata, and an associated binary MSMQ message payload. This setup is intended for pushing a message with specified priority and body type into an MSMQ queue named 'queue_name' on 'example.com', using SOAP actions for message handling."
 
 Code Snippet for Analysis:
 ```
 +--------------------------------------------------+
 |              HTTP Request Line                   |
 | POST /msmq/queue_name HTTP/1.1                   |
 +--------------------------------------------------+
 | Host: example.com                                |
 | Content-Type: multipart/related;                 |
 |   boundary="MSMQ_SOAP_boundary_12345";           |
 |   type="text/xml"                                |
 | Content-Length: 2100                             |
 | SOAPAction: "MSMQMessage"                        |
 | Proxy-Accept: NonInteractiveClient               |
 +--------------------------------------------------+

 --MSMQ_SOAP_boundary_12345
 +----------------------------------------------------+
 | Content-Type: text/xml; charset=UTF-8              |
 | Content-Length: 800                                |
 +----------------------------------------------------+
 |                 SOAP Envelope                      |
 | <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://...      |
 |   <SOAP-ENV:Header>                                |
 |     <m:Routing xmlns:m="<http://schemas>...">      |
 |       <Action>SendMessage</Action>                 |
 |       <To><http://example.com/msmq/queue_name></To>|
 |       <MessageID>uuid:123456789</MessageID>        |
 |     </m:Routing>                                   |
 |     <m:Properties>                                 |
 |       <ExpiresAt>20250101T123000</ExpiresAt>       |
 |       <SentAt>20241107T100000</SentAt>             |
 ...
 |   </SOAP-ENV:Body>                                 |
 | </SOAP-ENV:Envelope>                               |
 --MSMQ_SOAP_boundary_12345

 --MSMQ_SOAP_boundary_12345
 +--------------------------------------------------+
 | Content-Type: application/octet-stream           |
 | Content-Length: 1300                             |
 | Content-Id: uuid:123456789                       |
 +--------------------------------------------------+
 |               MSMQ Message                       |
 |         [Binary data or serialized object]       |
 +--------------------------------------------------+
 --MSMQ_SOAP_boundary_12345--

 ```
 Text Before the Code Snippet:
 These elements work together to create a structured message that MSMQ
 can route, prioritize, and deliver across networks. By understanding
 this layout, we could assemble a complete packet that the MSMQ server
 would accept.

 Text After the Code Snippet:
 </figure>

 Example Output:
 ###OUTPUT_START###
 {{"score": 9}}
 ###OUTPUT_END###

 <|im_start|> user
 Prompt for Analyzing Vulnerability-related Blog Posts:
 Tags and summary of the code snippet by another code analyser:
 {chain1_result}

 Summary of the Blog Post:
 {abstract}

 Sections of the Blog Containing the Code: 
 {sections}

 Code Snippet for Analysis:
 {code}

 Text Before the Code Snippet:
 {before}

 Text After the Code Snippet:
 {after}

 """
 prompt_poc_chain2_tail = """
 <|im_end|>
 <|im_start|> assistant
 ###OUTPUT_START###
 """


 # debug(f"prompt token num: {get_token_num(prompt_poc_chain1)+get_token_num(prompt_poc_chain1_tail)}")

 def check_poc_or_exp_chain(html_file):
    md_content = clean_html_file(html_file)
    abstract = article_abstract(html_file)
    codes=htmltool.html2codes(html_file)
    
    scores=[]
    for code in codes:
        scores.append(-1)
        
        prompt_chain1=shorten_prompt(
            prompt_poc_chain1_template.format(
                abstract=abstract, 
                sections=code.section,
                code=code.code, 
                before=code.before, 
                after=code.after)
            , 7980-get_token_num(prompt_poc_chain1_tail)) \
            + prompt_poc_chain1_tail
        
        max_retries=3
        retries=0
        while retries<max_retries:
            try:
                response1 = llm_api_mistral(prompt_chain1)

                if DEBUG:
                    with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f:
                        f.write("#"*50+"\n")
                        f.write(f"{html_file}\n")
                        f.write(f"Prompt_chain1:\n{prompt_chain1}\n\nResponse_chain1:\n{response1}\n")
                        f.write("\n")
                chain1_result=response1.replace("###OUTPUT###", "") \
                            .replace("###YAML_START###", "") \
                            .replace("###YAML_END###", "")
                
                # m=re.match(r".*###OUTPUT###(.*)###YAML_START###.*", response1, re.MULTILINE|re.DOTALL).group()
                # assert m
                # chain1_result=m[1]
                break
            except Exception as e:
                error(f"Error occurred: {e}")
                retries+=1
                if retries>=max_retries:
                    chain1_result=""

        
        prompt_chain2_head=prompt_poc_chain2_template.format(
            chain1_result=chain1_result,
            abstract=abstract, 
            sections=code.section,
            code=code.code, 
            before=code.before, 
            after=code.after
        )
        prompt_chain2=shorten_prompt(
            prompt_chain2_head, 7980-get_token_num(prompt_poc_chain2_tail)
        )+prompt_poc_chain2_tail
        
        retries=0
        while retries<max_retries:
        
            try:
                response2 = llm_api_mistral(prompt_chain2)
                if DEBUG:
                    with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f:
                        f.write("#"*50+"\n")
                        f.write(f"{html_file}\n")
                        f.write(f"Prompt_chain2:\n{prompt_chain2}\n\nResponse_chain2:\n{response2}\n")
                        f.write("\n")
                        
                jstr=response2.replace("###OUTPUT_START###", "") \
                            .replace("###OUTPUT_END###", "") \
                            .replace("\n", "") 
                
                score=int(json.loads(jstr)["score"])
                break
            except Exception as e:
                error(f"Error occurred: {e}")
                retries+=1
                if retries>=max_retries:
                    score=0
        
        scores[-1]=score
    debug(f"""check_poc_or_exp_chain: html_file: {html_file}\t Scores: {scores}""")
    if scores:
        for s in scores:
            if s>=7:
                return "TRUE"
    return "FALSE"

	prompt_poc = """
	<\|im_start\|> system
	You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
	<\|im_end\|>
	<\|im_start\|> user
	Task: Your task is to analyze the vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code.
	1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example.
	2. Provide a confidence score from 1 to 5 (where 5 signifies high confidence) on your determination.
	<\|im_end\|>
	<\|im_start\|> system
	Definition:
	A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact.
	A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access.
	types and features of PoC/Exp:
	1. remote code execution (RCE): allows an attacker to execute arbitrary code on a target system, usually python, bash, or shell code.
	2. local privilege escalation (LPE): allows an attacker to gain higher privileges on a target system.
	3. denial of service (DoS): causes a system to become unresponsive or crash.
	4. information disclosure: exposes sensitive information to unauthorized users.
	5. CSRF, XSS, SQLi, SSRF, XXE, and other web vulnerabilities: allows an attacker to manipulate web applications to perform malicious actions.
	6. usually use long and human unreadable strings, special characters, or encoded payloads for obfuscation.

	<\|im_start\|> user
	Prompt for Analyzing Vulnerability-related Blog Posts:

	Summary of the Blog Post:
	{abstract}

	Sections of the Blog Containing the Code:
	{sections}

	Code Snippet for Analysis:
	{code}

	Text Before the Code Snippet:
	{before}

	Text After the Code Snippet:
	{after}

	"""
	prompt_poc_tail = """
	<\|im_end\|>
	<\|im_start\|> system
	output format:
	Provide a json string with the key "score" and the value as the confidence score from 1 to 5, without any other content.
	example: {"score": 4}
	<\|im_end\|>
	<\|im_start\|> system
	{"score": 4}
	<\|im_end\|>
	<\|im_start\|> assistant
	"""

	import json

	def check_poc_or_exp(html_file):
	md_content = clean_html_file(html_file)
	abstract = article_abstract(html_file)
	codes=htmltool.html2codes(html_file)

	# 服务降级处理
	if not codes:
	return check_poc_or_exp_old(html_file)

	scores=[]
	for code in codes:
	prompt_head = prompt_poc.format(
	abstract=abstract,
	sections=code.section,
	code=code.code,
	before=code.before,
	after=code.after)
	_tmp_score = []
	for _ in range(3):
	prompt = shorten_prompt(prompt_head, 7900)+ prompt_poc_tail
	response = llm_api_mistral(prompt)
	time.sleep(FORCE_SLEEP)
	if response is not None:
	try:
	debug(f"prompt: {prompt} \n response: {response}")
	_tmp_score.append(int(json.loads(response)["score"]))
	except Exception as e:
	error(f"Error occurred: {e}")
	if _tmp_score:
	scores.append(sum(_tmp_score)/len(_tmp_score))

	debug(f"""## check_poc_or_exp:
	html_file: {html_file}
	code: {code.code[:100].replace("\n", " ")}
	score: {scores[-1]}""")
	else:
	scores.append(0)
	if DEBUG:
	debug(f"""## check_poc_or_exp: scores: {scores}""")
	for code,score in zip(codes,scores):
	debug(f"""## score: {score}\t code: {code.code[:200].replace("\n", " ")}""")

	if score:
	for s in scores:
	if s>=4.5:
	return "TRUE"
	return "FALSE"

	# 对PoC/Exp进行初始分类
	prompt_poc_chain1_template = """
	<\|im_start\|> system
	You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
	<\|im_end\|>
	<\|im_start\|> user
	Task: Your task is to analyze the code snippet in vulnerability analysis text provided and category the code from diverse perspectives with tags. The code snippet contains article abstracts, code snippet, the before and after text of the code, and the sections of the blog containing the code.
	different perspective:
	1. code language (single-choice): None, Log, Http Request, Assembly, Pseudocode , Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc.
	2. contains full code (single-choice): Full, Partial, None
	3. contains code part (multi-choice): None, Payload, Shellcode, Constructed Input, Network Request, Function Definition, Struct Definition, Variable Definition, etc.
	4. code action features (multi-choice): None, Execute Command, Network Request, Memory Operation, File Operation, Registry Operation, Pwn, Crypto, Reversing, etc.
	5. code input (multi-choice): None, address, port, username, password, command, file path, registry key, shell code,payload, etc.
	6. code hacker log print (contain "[+]/[-]/[*]/[!]") (single-choice) : Yes, No
	7. malformed format (single-choice): Yes, No
	8. Consecutive nonsense characters (single-choice): Yes, No
	9. target crashed (single-choice): Yes, No
	10 Executable code (single-choice): Yes, No
	11. Vulnerability type (multi-choice): None, Remote Code Execution, Local Privilege Escalation, Denial of Service, Information Disclosure, CSRF, XSS, SQLi, SSRF, XXE, etc.
	12. Vulnerability trigger (multi-choice): None, Buffer Overflow, Integer Overflow, SQL Injection, Command Injection, Path Traversal, Unrestricted File Upload, etc.
	13. Vulnerability target's code language (single-choice): None, Bash, Python, Java, PHP, JavaScript, C, C++, Powershell, etc.
	14. Poc/Exp mentioned in Abstract (single-choice): Yes, No
	15. Poc/Exp mentioned in Before (single-choice): Yes, No
	16. Poc/Exp mentioned in After (single-choice): Yes, No
	17. Poc/Exp mentioned in Section Names (single-choice): Yes, No
	18. Code Summary (text): "a brief summary of code, including the content, target, impact of the code."
	<\|im_end\|>

	<\|im_start\|> system
	output format:
	Provide a YAML object contains key-value pairs, values are lists or str,without any other content.
	example:

	###YAML_START###
	1-code_language: JavaScript
	2-contains_full_code: Partial
	3-contains_code_part: [ "Constructed Input"]
	4-code_action_features: ["Network Request", "Memory Operation"]
	5-code_input: ["address", "port"]
	6-code_hacker_log_print: No
	7-malformed_format: Yes
	8-Consecutive_nonsense_characters: Yes
	9-target_crashed: Yes
	10-Executable_code: Yes
	11-Vulnerability_type: ["Use After Free"]
	12-Vulnerability_trigger: ["Malformed XML Data"]
	13-Vulnerability_target_code_language: ["C"]
	14-PocExp_mentioned_in_Abstract: Yes
	15-PocExp_mentioned_in_Before: Yes
	16-PocExp_mentioned_in_After: Yes
	17-PocExp_mentioned_in_SectionNames: No
	18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability."
	###YAML_END###

	<\|im_end\|>

	<\|im_start\|> user
	Example Input:
	Prompt for Analyzing Vulnerability-related Blog Posts:

	Summary of the Blog Post:
	This article discusses a vulnerability, identified as CVE-2020-3800, in Adobe Acrobat DC version 2019.008.20064 on Windows 10 64-bit. The vulnerability is triggered by executing specific JavaScript code that causes a use-after-free condition due to malformed XML data. Although the crash can be observed, the impact of this vulnerability is considered low because attempts to reallocate the freed object were unsuccessful. The vendor acknowledged the issue and released a patch to address it.

	Sections of the Blog Containing the Code:
	# Description of the vulnerability\#

	Code Snippet for Analysis:
	```
	xfa.loadXML('<document>\\na\\na\\na\\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
	'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
	'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
	'\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na\\na'+
	'\\na\\na\\na\\na\\na\n<![CDATA[\\n</document>')
	```

	Text Before the Code Snippet:
	::: highlightAdobe Reader and Adobe Acrobat DC crashes after executing the following
	Javascript code:Both Adobe Reader and Acrobat DC share the same `AcroForm.api` plugin:
	File Version 19.012.20040.17853

	Text After the Code Snippet:
	:::The malformed XML data causes a use-after-free to occur. While there are
	there are many indirect calls following this that can be used to control
	EIP, many combinations of XML data and Javascript code was tested but
	was unable to re-allocated back into the freed object hence the impact
	of this vulnerability might be low.

	Example Output:
	###OUTPUT###
	###YAML_START###
	1-code_language: JavaScript
	2-contains_full_code: Partial
	3-contains_code_part: [ "Constructed Input"]
	4-code_action_features: [ "Memory Operation"]
	5-code_input: [None]
	6-code_hacker_log_print: No
	7-malformed_format: Yes
	8-Consecutive_nonsense_characters: Yes
	9-target_crashed: Yes
	10-Executable_code: Yes
	11-Vulnerability_type: ["Use After Free"]
	12-Vulnerability_trigger: ["Malformed XML Data"]
	13-Vulnerability_target_code_language: ["C"]
	14-PocExp_mentioned_in_Abstract: Yes
	15-PocExp_mentioned_in_Before: Yes
	16-PocExp_mentioned_in_After: Yes
	17-PocExp_mentioned_in_SectionNames: No
	18-CodeSummary: "contain a malformed xml, send to Adobe Acrobat DC, raise use-after-free vulnerability."
	###YAML_END###
	<\|im_end\|>
	<\|im_start\|> user
	Prompt for Analyzing Vulnerability-related Blog Posts:

	Summary of the Blog Post:
	{abstract}

	Sections of the Blog Containing the Code:
	{sections}

	Code Snippet for Analysis:
	```
	{code}
	```
	Text Before the Code Snippet:
	{before}

	Text After the Code Snippet:
	{after}

	"""
	prompt_poc_chain1_tail = """
	<\|im_end\|>

	<\|im_start\|> assistant
	###OUTPUT###

	"""

	prompt_poc_chain2_template = """
	<\|im_start\|> system
	You are an advanced vulnerability mining tool, which is specially used to extract key knowledge from vulnerability analysis text.
	<\|im_end\|>
	<\|im_start\|> user
	Task: Your task is to analyze the code snippet in vulnerability analysis text provided and determine whether there is partial or complete code of PoC or EXP in the text, including executable script code, constructed input function, network request, and shell code.
	1. Determine whether the code segment aligns with the characteristics of a Vulnerability Proof of Concept (PoC) or a Vulnerability Exploitation (Exp) example.
	2. Provide a confidence score from 0 to 9 (where 9 signifies high confidence) on your determination.
	<\|im_end\|>
	<\|im_start\|> system
	Definition:
	A Proof of Concept (PoC) is a demonstration of a vulnerability or exploit that shows how a system can be compromised. It is used to verify the existence of a vulnerability and demonstrate its impact.
	A Vulnerability Exploitation (Exp) is the act of taking advantage of a vulnerability to compromise a system or gain unauthorized access.
	Rules for determining whether a code snippet is PoC/Exp or No:
	1. If the code is decompiled pseudo code or assembly code from Vulnerability_target, answer `0`.
	2. If the code is from a part of Vulnerability_target and with same code language, answer `0`.
	3. if the code can crash the Vulnerability_target, answer `9`.
	4. If the code can execute arbitrary code on the Vulnerability_target, answer `9`.
	5. If the code can gain higher privileges on the Vulnerability_target, answer `9`.
	6. If the code can cause a system to become unresponsive or crash, answer `9`.
	7. If the code can expose sensitive information to unauthorized users, answer `9`.
	8. If the code can manipulate web applications to perform malicious actions, answer `9`.
	9. If the code contains long and human unreadable strings, special characters, or encoded payloads for obfuscation, answer `9`.
	10. If the code contains hacker log print (contain "[+]/[-]/[*]/[!]"), answer `5+`.
	11. If the code contains Network Request, and the target crashed or the code is executable or get higher privileges or get shell, answer `9`.
	12. If the code contains pwn part, answer `9`.
	13. If the code contains crypto part, give a high score.
	14. If Poc/Exp mentioned in Before or After or SectionNames, give a high score.
	<\|im_end\|>
	<\|im_start\|> system
	output format:
	Provide a json string with the key "score" and the value as the confidence score from 0 to 9,wrapped in delimiters, without any other content.
	example:
	###OUTPUT_START###
	{{"score": 4}}
	###OUTPUT_END###
	<\|im_start\|> user
	Example Input:
	Prompt for Analyzing Vulnerability-related Blog Posts:

	Summary of the Blog Post:
	This article discusses a newly discovered vulnerability in Citrix Virtual Apps and Desktops, focusing on the "Session Recording" feature. The feature, designed to monitor and record user activity for compliance and troubleshooting, is found to have a critical flaw due to the use of insecure deserialization via the BinaryFormatter in .NET. The vulnerability arises from overly permissive access controls on the Microsoft Message Queuing (MSMQ) service, which allows unauthenticated users to send messages that can be deserialized and executed. The authors detail the architecture and communication mechanisms of the Session Recording feature, highlighting how the BinaryFormatter's known security issues, combined with the MSMQ configuration, create an exploitable condition. They also explore potential exploitation vectors, including leveraging existing gadgets and exploiting HTTP-to-MSMQ message queuing, to achieve remote code execution.

	Sections of the Blog Containing the Code: None

	## Explanation of the Packet Structure
	Tags and summary of the code snippet by another code analyser:
	1-code_language: Http Request
	2-contains_full_code: Partial
	3-contains_code_part: ["Network Request", "Constructed Input"]
	4-code_action_features: ["Network Request"]
	5-code_input: [None]
	6-code_hacker_log_print: No
	7-malformed_format: No
	8-Consecutive_nonsense_characters: No
	9-target_crashed: No
	10-Executable_code: No
	11-Vulnerability_type: ["Remote Code Execution"]
	12-Vulnerability_trigger: ["Insecure Deserialization"]
	13-Vulnerability_target_code_language: [".NET"]
	14-PocExp_mentioned_in_Abstract: Yes
	15-PocExp_mentioned_in_Before: No
	16-PocExp_mentioned_in_After: No
	17-PocExp_mentioned_in_SectionNames: No
	18-CodeSummary: "Constructs and sends a multipart HTTP POST request to a Microsoft Message Queuing (MSMQ) server endpoint, which includes a SOAP envelope for routing and message metadata, and an associated binary MSMQ message payload. This setup is intended for pushing a message with specified priority and body type into an MSMQ queue named 'queue_name' on 'example.com', using SOAP actions for message handling."

	Code Snippet for Analysis:
	```
	+--------------------------------------------------+
	\| HTTP Request Line \|
	\| POST /msmq/queue_name HTTP/1.1 \|
	+--------------------------------------------------+
	\| Host: example.com \|
	\| Content-Type: multipart/related; \|
	\| boundary="MSMQ_SOAP_boundary_12345"; \|
	\| type="text/xml" \|
	\| Content-Length: 2100 \|
	\| SOAPAction: "MSMQMessage" \|
	\| Proxy-Accept: NonInteractiveClient \|
	+--------------------------------------------------+

	--MSMQ_SOAP_boundary_12345
	+----------------------------------------------------+
	\| Content-Type: text/xml; charset=UTF-8 \|
	\| Content-Length: 800 \|
	+----------------------------------------------------+
	\| SOAP Envelope \|
	\| <SOAP-ENV:Envelope xmlns:SOAP-ENV="http://... \|
	\| <SOAP-ENV:Header> \|
	\| <m:Routing xmlns:m="<http://schemas>..."> \|
	\| <Action>SendMessage</Action> \|
	\| <To><http://example.com/msmq/queue_name></To>\|
	\| <MessageID>uuid:123456789</MessageID> \|
	\| </m:Routing> \|
	\| <m:Properties> \|
	\| <ExpiresAt>20250101T123000</ExpiresAt> \|
	\| <SentAt>20241107T100000</SentAt> \|
	...
	\| </SOAP-ENV:Body> \|
	\| </SOAP-ENV:Envelope> \|
	--MSMQ_SOAP_boundary_12345

	--MSMQ_SOAP_boundary_12345
	+--------------------------------------------------+
	\| Content-Type: application/octet-stream \|
	\| Content-Length: 1300 \|
	\| Content-Id: uuid:123456789 \|
	+--------------------------------------------------+
	\| MSMQ Message \|
	\| [Binary data or serialized object] \|
	+--------------------------------------------------+
	--MSMQ_SOAP_boundary_12345--

	```
	Text Before the Code Snippet:
	These elements work together to create a structured message that MSMQ
	can route, prioritize, and deliver across networks. By understanding
	this layout, we could assemble a complete packet that the MSMQ server
	would accept.

	Text After the Code Snippet:
	</figure>

	Example Output:
	###OUTPUT_START###
	{{"score": 9}}
	###OUTPUT_END###

	<\|im_start\|> user
	Prompt for Analyzing Vulnerability-related Blog Posts:
	Tags and summary of the code snippet by another code analyser:
	{chain1_result}

	Summary of the Blog Post:
	{abstract}

	Sections of the Blog Containing the Code:
	{sections}

	Code Snippet for Analysis:
	{code}

	Text Before the Code Snippet:
	{before}

	Text After the Code Snippet:
	{after}

	"""
	prompt_poc_chain2_tail = """
	<\|im_end\|>
	<\|im_start\|> assistant
	###OUTPUT_START###
	"""


	# debug(f"prompt token num: {get_token_num(prompt_poc_chain1)+get_token_num(prompt_poc_chain1_tail)}")

	def check_poc_or_exp_chain(html_file):
	md_content = clean_html_file(html_file)
	abstract = article_abstract(html_file)
	codes=htmltool.html2codes(html_file)

	scores=[]
	for code in codes:
	scores.append(-1)

	prompt_chain1=shorten_prompt(
	prompt_poc_chain1_template.format(
	abstract=abstract,
	sections=code.section,
	code=code.code,
	before=code.before,
	after=code.after)
	, 7980-get_token_num(prompt_poc_chain1_tail)) \
	+ prompt_poc_chain1_tail

	max_retries=3
	retries=0
	while retries<max_retries:
	try:
	response1 = llm_api_mistral(prompt_chain1)

	if DEBUG:
	with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f:
	f.write("#"*50+"\n")
	f.write(f"{html_file}\n")
	f.write(f"Prompt_chain1:\n{prompt_chain1}\n\nResponse_chain1:\n{response1}\n")
	f.write("\n")
	chain1_result=response1.replace("###OUTPUT###", "") \
	.replace("###YAML_START###", "") \
	.replace("###YAML_END###", "")

	# m=re.match(r".###OUTPUT###(.)###YAML_START###.*", response1, re.MULTILINE\|re.DOTALL).group()
	# assert m
	# chain1_result=m[1]
	break
	except Exception as e:
	error(f"Error occurred: {e}")
	retries+=1
	if retries>=max_retries:
	chain1_result=""


	prompt_chain2_head=prompt_poc_chain2_template.format(
	chain1_result=chain1_result,
	abstract=abstract,
	sections=code.section,
	code=code.code,
	before=code.before,
	after=code.after
	)
	prompt_chain2=shorten_prompt(
	prompt_chain2_head, 7980-get_token_num(prompt_poc_chain2_tail)
	)+prompt_poc_chain2_tail

	retries=0
	while retries<max_retries:

	try:
	response2 = llm_api_mistral(prompt_chain2)
	if DEBUG:
	with open("./tmp/poc_chain1.txt", "a", encoding="utf-8") as f:
	f.write("#"*50+"\n")
	f.write(f"{html_file}\n")
	f.write(f"Prompt_chain2:\n{prompt_chain2}\n\nResponse_chain2:\n{response2}\n")
	f.write("\n")

	jstr=response2.replace("###OUTPUT_START###", "") \
	.replace("###OUTPUT_END###", "") \
	.replace("\n", "")

	score=int(json.loads(jstr)["score"])
	break
	except Exception as e:
	error(f"Error occurred: {e}")
	retries+=1
	if retries>=max_retries:
	score=0

	scores[-1]=score
	debug(f"""check_poc_or_exp_chain: html_file: {html_file}\t Scores: {scores}""")
	if scores:
	for s in scores:
	if s>=7:
	return "TRUE"
	return "FALSE"