Created
February 21, 2025 19:18
-
-
Save Tiberriver256/263fcb84fe20edc12b379a63ccafd73b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /// script | |
# dependencies = [ | |
# "crawl4ai", | |
# "asyncio" | |
# ] | |
# /// | |
import asyncio | |
from crawl4ai import * | |
import os | |
async def main(): | |
# Create a list to store all markdown content | |
all_markdown = [] | |
async with AsyncWebCrawler() as crawler: | |
# Crawl the main page first | |
result = await crawler.arun( | |
url="<insert-your-url-here>", | |
) | |
# Add main page markdown with header | |
all_markdown.append(f"# {result.metadata.get("title")}\nURL: {result.url}\n\n{result.markdown}\n") | |
print("Main page processed...") | |
# Create a set to track processed URLs to avoid duplicates | |
processed_urls = {result.url} | |
# Process internal links | |
for link in result.links.get("internal", []): | |
if link.get("href") in processed_urls: | |
continue | |
processed_urls.add(link.get("href")) | |
try: | |
sub_result = await crawler.arun(url=link.get("href")) | |
# Add subpage markdown with header | |
all_markdown.append(f"# {sub_result.metadata.get("title")}\nURL: {sub_result.url}\n\n{sub_result.markdown}\n") | |
print(f"Processed: {link.get('href')}") | |
except Exception as e: | |
print(f"Error crawling {link.get('href')}: {str(e)}") | |
# Create markdown directory if it doesn't exist | |
os.makedirs("markdown", exist_ok=True) | |
# Save all markdown to a single file | |
with open("markdown/content.md", "w", encoding="utf-8") as f: | |
f.write("\n---\n\n".join(all_markdown)) | |
print("\nMarkdown content has been saved to markdown/content.md") | |
if __name__ == "__main__": | |
asyncio.run(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment