Skip to content

Instantly share code, notes, and snippets.

@markrittman
Created August 2, 2024 23:48

Revisions

  1. markrittman created this gist Aug 2, 2024.
    271 changes: 271 additions & 0 deletions scrape_squarespace_blogs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,271 @@
    !pip install requests beautifulsoup4

    import sys
    import subprocess
    import time
    import random

    # Install required packages
    def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

    print("Installing required packages...")
    install('requests')
    install('beautifulsoup4')
    install('urllib3') # Adding this for better HTTPS support
    print("Packages installed successfully.")

    import requests
    from bs4 import BeautifulSoup
    import csv
    from datetime import datetime
    from requests.adapters import HTTPAdapter
    from urllib3.util.retry import Retry

    # Set up a retry strategy
    def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
    ):
    session = session or requests.Session()
    retry = Retry(
    total=retries,
    read=retries,
    connect=retries,
    backoff_factor=backoff_factor,
    status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session

    def scrape_squarespace_blog(url, session):
    try:
    # Send a GET request to the URL with retry logic
    response = session.get(url, timeout=30)
    response.raise_for_status() # Raise an exception for bad status codes

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract the title
    title_element = soup.find('h1', class_='entry-title')
    title = title_element.text.strip() if title_element else "Title not found"

    # Extract the author
    author_element = soup.find('a', class_='blog-author-name')
    author = author_element.text.strip() if author_element else "Author not found"

    # Extract the date
    date_element = soup.find('time', class_='dt-published')
    date = date_element['datetime'] if date_element else 'Date not found'

    # Extract the main content
    content_div = soup.find('div', class_='blog-item-content')
    if content_div:
    content = ' '.join([p.text for p in content_div.find_all(['p', 'h2', 'h3', 'h4', 'h5', 'h6'])])
    else:
    content = "Content not found"

    return {
    'url': url,
    'title': title,
    'author': author,
    'date': date,
    'content': content
    }
    except requests.exceptions.RequestException as e:
    print(f"An error occurred while scraping {url}: {e}")
    return None




    # List of URLs to scrape
    urls = [
    'https://www.rittmananalytics.com/blog/2023/2/18/kpi-dashboards-and-balanced-scorecards-using-looker-dbt-and-google-bigquery',
    'https://www.rittmananalytics.com/blog/2023/2/19/the-dbt-semantic-layer-data-orchestration-and-the-modern-enterprise-data-stack',
    'https://www.rittmananalytics.com/blog/2023/2/19/modern-data-stack-healthcheck-service-from-rittman-analytics',
    'https://www.rittmananalytics.com/blog/2023/3/1/behavioural-analytics-querying-fast-and-slow',
    'https://www.rittmananalytics.com/blog/2019/12/18/previewing-the-new-looker-dashboard-experience-and-whats-coming-in-looker-7',
    'https://www.rittmananalytics.com/blog/2019/01/21/2019-1-21-looker-london-meetup-on-thursday-february-7th-2019-registration-now-open',
    'https://www.rittmananalytics.com/blog/2018/11/27/2018-11-27-mjr-analytics-sessions-at-ukoug-tech18-liverpool-acc-3rd-5th-december-2018',
    'https://www.rittmananalytics.com/blog/2018/11/14/2018-11-14-slides-and-forbescom-article-from-data-warehouse-like-a-tech-startup-with-oracle-autonomous-data-warehouse-cloud',
    'https://www.rittmananalytics.com/blog/2018/11/12/event-level-digital-analytics-using-google-analytics-fivetran-bigquery-andnbsplooker',
    'https://www.rittmananalytics.com/blog/2018/10/28/nlnhb0kr2emevhbm2qee7jcjot1qtx',
    'https://www.rittmananalytics.com/blog/2023/10/29/coalesce-2023-and-the-new-dbt-cloud-cli',
    'https://www.rittmananalytics.com/blog/2023/9/7/bringing-dbt-and-analytics-engineering-to-oracle-autonomous-data-warehouse',
    'https://www.rittmananalytics.com/blog/2021/5/30/extending-the-dbt-test-pipeline-to-downstream-looker-content-using-spectacles-dbtcloud-andnbspgithub',
    'https://www.rittmananalytics.com/blog/2024/7/5/ai-powered-conversational-data-analyst-chatbot',
    'https://www.rittmananalytics.com/blog/2024/6/17/automated-insights-gemini-1-5-flash',
    'https://www.rittmananalytics.com/blog/2024/2/5/generative-ai-comes-to-looker-via-vertex-ai-and-bigquery-bqml',
    'https://www.rittmananalytics.com/blog/2024/1/14/automate-your-contacts-list-segmentation-using-google-bigquery-vertex-ai-and-the-mlgeneratetext-function',
    'https://www.rittmananalytics.com/blog/2023/5/11/ra-assistant-our-gpt-35-turbo-powered-modern-data-stack-chatbot',
    'https://www.rittmananalytics.com/blog/2023/3/26/chatgpt-large-language-models-and-the-future-of-dbt-and-analytics-consulting',
    'https://www.rittmananalytics.com/blog/2016/09/27/2016-09-27-drill-to-detail-ep-2-the-future-of-sql-on-hadoop-with-special-guest-dan-mcclary',
    'https://www.rittmananalytics.com/blog/2023/10/12/rittman-analytics-and-coalesce-2023-san-diego-well-be-there',
    'https://www.rittmananalytics.com/blog/2023/6/18/wednesday-webinar-series-how-rittman-analytics-builds-modern-data-stacks-using-cube-preset-and-dagster-wednesday-28th-june-2023',
    'https://www.rittmananalytics.com/blog/2023/5/23/upcoming-30-minute-webinar-how-rittman-analytics-automates-dbt-looker-and-cubedev-project-delivery-presented-by-lewis-baker',
    'https://www.rittmananalytics.com/blog/2023/2/20/cube-semantic-layer-webinar',
    'https://www.rittmananalytics.com/blog/2024/7/10/making-smart-buildings-smarter-for-facility-solutions-group-with-embeddable',
    'https://www.rittmananalytics.com/blog/2024/4/25/dynamic-data-model-definition-in-cube-using-python-and-jinja',
    'https://www.rittmananalytics.com/blog/2023/2/24/building-up-a-semantic-layer-with-dbt-metrics-cube-and-droughty',
    'https://www.rittmananalytics.com/blog/2022/12/21/customer-first-order-segmentation-using-looker-and-google-bigquery',
    'https://www.rittmananalytics.com/blog/2024/4/30/rittman-analytics-achieves-the-data-analytics-partner-specialization-in-google-cloud-partner-advantage',
    'https://www.rittmananalytics.com/blog/2024/3/17/data-lineage-for-your-google-bigquery-dbt-and-cloud-composer-data-pipelines-using-dataplex-and-data-catalog',
    'https://www.rittmananalytics.com/blog/2023/09/28/google-cloud-cortex-framework',
    'https://www.rittmananalytics.com/blog/2021/7/25/event-based-analytics-and-bigquery-export-comes-to-google-analytics-4-how-does-it-worknbsp-and-whats-thenbspcatch',
    'https://www.rittmananalytics.com/blog/2021/1/09/new-121-release-of-ra-data-warehouse-for-dbt-fivetran-bigquery-segment-and-now-snowflake-dw',
    'https://www.rittmananalytics.com/blog/2020/5/20/happy-10th-birthday-google-bigquery-our-preferred-cloud-data-warehousing-platform',
    'https://www.rittmananalytics.com/blog/2020/3/6/bigquery-ios-dbt-numerics',
    'https://www.rittmananalytics.com/blog/2019/4/14/supermetrics-google-bigquery-and-data-pipelines-for-digital-marketers',
    'https://www.rittmananalytics.com/blog/2016/11/19/2016-11-19-google-bigquery-and-why-big-data-is-about-to-have-its-gmail-moment',
    'https://www.rittmananalytics.com/blog/2024/4/4/the-rittman-analytics-guide-to-modernising-your-data-stack-innbsp2024',
    'https://www.rittmananalytics.com/blog/2024/3/4/looker-benchmarking-financial-analytics',
    'https://www.rittmananalytics.com/blog/2023/5/25/london-to-brighton-bike-ride-2023-fundraising-for-dementia-uk',
    'https://www.rittmananalytics.com/blog/2023/4/6/building-a-mobile-friendly-kpi-dashboard-using-looker-studio-integration-with-looker-universal-semantic-model',
    'https://www.rittmananalytics.com/blog/2023/2/18/kpi-dashboards-and-balanced-scorecards-using-looker-dbt-and-google-bigquery',
    'https://www.rittmananalytics.com/blog/2022/5/5/presenting-on-dbt-amp-firebolt-at-the-budapest-dbt-meetup-tuesday-10th-may-2022',
    'https://www.rittmananalytics.com/blog/2022/2/1/lightdash-looker-and-dbt-as-the-bi-tool-metrics-layer',
    'https://www.rittmananalytics.com/blog/2021/12/16/using-looker-to-analyze-and-visualise-your-customer-concentration',
    'https://www.rittmananalytics.com/blog/2021/11/21/adding-forecasting-to-your-looker-reports-and-dashboards',
    'https://www.rittmananalytics.com/blog/2021/6/20/rfm-analysis-and-customer-segmentation-using-looker-dbt-and-google-bigquery',
    'https://www.rittmananalytics.com/blog/2021/2/22/customer-cohorting-retention-curves-and-predictive-lifetime-value-using-looker-and-google-bigquery',
    'https://www.rittmananalytics.com/blog/2020/1/7/forecasting-hubspot-deal-revenue-and-resourcing-needs-using-dbt-and-looker',
    'https://www.rittmananalytics.com/blog/2023/12/8/how-rittman-analytics-does-web-marketing-anaytics',
    'https://www.rittmananalytics.com/blog/2023/5/9/building-your-own-ga4-rules-based-marketing-attribution-models-using-google-bigquery-andnbsplooker',
    'https://www.rittmananalytics.com/blog/2022/12/02/improving-wordpress-search-keyword-performance-using-looker-google-search-console-and-fivetran',
    'https://www.rittmananalytics.com/blog/2023/2/18/medium-squarespace-or-githubnbsp-content-marketing-value-analytics-using-looker-dbt-and-segment',
    'https://www.rittmananalytics.com/blog/2022/5/30/stitching-identity-across-the-customer-journey-using-segment-google-bigquery-and-looker',
    'https://www.rittmananalytics.com/blog/2022/2/20/rudderstack-snowplow-and-open-source-cdp-alternatives-to-segment',
    'https://www.rittmananalytics.com/blog/2021/2/15/customer-360-degree-analysis-and-hightouch',
    'https://www.rittmananalytics.com/blog/2020/9/19/ad-spend-and-campaign-roi-analytics-using-segment-looker-dbt-and-googlenbspbigquery',
    'https://www.rittmananalytics.com/blog/2020/7/16/connecting-intercom-to-segment-personas-for-more-relevant-and-cost-effective-customer-services-agents',
    'https://www.rittmananalytics.com/blog/2020/2/8/multichannel-attribution-bigquery-dbt-looker-segment',
    'https://www.rittmananalytics.com/blog/2019/11/6/e058gepqwiyyx4mop3eb06mn9ckjkp',
    'https://www.rittmananalytics.com/blog/2019/5/22/0r1fgtifyovghse903ha3vwdwwbp7j',
    'https://www.rittmananalytics.com/blog/2022/4/25/analyzing-the-hacker-news-public-dataset-using-firebolt-data-warehouse-and-looker',
    'https://www.rittmananalytics.com/blog/2020/9/21/newlookforlooker7',
    'https://www.rittmananalytics.com/blog/2020/6/4/drill-to-detail-ep82-looker-development-automated-testing-and-spectacles-with-special-guest-josh-temple',
    'https://www.rittmananalytics.com/blog/2020/6/3/column-level-data-profiling-for-google-bigquery-datasets-using-dbt',
    'https://www.rittmananalytics.com/blog/2020/4/26/coronavirus-ncf',
    'https://www.rittmananalytics.com/blog/2020/1/3/modelling-slowly-changing-dimensions-type-23-and-6-using-dbt-and-looker',
    'https://www.rittmananalytics.com/blog/2019/8/19/hubspot-data-actions-harvest-analytical-workflows-and-looker-data-platform',
    'https://www.rittmananalytics.com/blog/2019/7/18/new-features-in-looker-616-conditional-alerts-beta-content-curation-beta-and-lookml-ide-folders',
    'https://www.rittmananalytics.com/blog/2019/7/7/news-rittman-analytics-is-now-a-uk-consulting-partner-for-dbt-data-build-tool',
    'https://www.rittmananalytics.com/blog/2019/07/01/news-on-the-second-london-looker-developer-meetup-10th-july-2019-at-gocardless-london',
    'https://www.rittmananalytics.com/blog/2019/6/12/rittman-analytics-is-now-a-segment-certified-implementation-partner',
    'https://www.rittmananalytics.com/blog/2019/6/10/continuous-integration-feature-branches-and-automated-build-tests-using-dbtcloud',
    'https://www.rittmananalytics.com/blog/2023/11/12/how-rittman-analytics-builds-data-stacks-for-growth-stage-businesses-using-cube-dagster-and-preset',
    'https://www.rittmananalytics.com/blog/2022/12/06/10-ways-your-modern-data-stack-project-can-fail',
    'https://www.rittmananalytics.com/blog/2022/5/30/how-rittman-analytics-does-analytics-part-2-building-our-modern-data-stack-using-dbt-google-bigquery-looker-segment-and-rudderstack',
    'https://www.rittmananalytics.com/blog/2021/3/12/customer-data-warehouses-are-the-new-customer-data-platform',
    'https://www.rittmananalytics.com/blog/2021/1/17/deduplicating-dbt-saas-data-warehousing',
    'https://www.rittmananalytics.com/blog/2020/5/27/introducing-the-ra-warehouse-dbt-framework-how-rittman-analytics-does-data-centralization',
    'https://www.rittmananalytics.com/blog/2019/5/7/how-rittman-analytics',
    'https://www.rittmananalytics.com/blog/2024/6/25/oracle-and-google-cloud-partnership',
    'https://www.rittmananalytics.com/blog/2018/11/17/2018-11-17-five-thoughts-about-thomas-kurians-move-to-google-cloud-platform',
    'https://www.rittmananalytics.com/blog/2016/09/29/2016-09-29-oracles-big-data-platform-goes-cloud-becomes-elastic-and-suddenly-looks-very-interesting',
    'https://www.rittmananalytics.com/blog/2016/09/25/2016-09-25-new-oracle-magazine-article-on-oracle-big-data-spatial-graph-for-social-network-analysis',
    'https://www.rittmananalytics.com/blog/2016/09/25/2016-09-25-obiee12c-pushing-up-daisies-or-more-relevant-than-ever-in-the-world-of-bimodal-it',
    'https://www.rittmananalytics.com/blog/2024/4/23/data-analytics-project-planning-checklistthe-definitive-guide-to-planning-your-data-analytics-initiative',
    'https://www.rittmananalytics.com/blog/2023/12/29/end-of-year-special-best-of-the-rittman-analytics-blog-2023-pdf-ebook',
    'https://www.rittmananalytics.com/blog/2019/12/15/segmentcdpemailtracking',
    'https://www.rittmananalytics.com/blog/2019/8/4/financial-reporting-in-looker-using-g-accon-for-xero-bigquery-and-dbt',
    'https://www.rittmananalytics.com/blog/2019/10/7/presenting-on-oracle-autonomous-data-warehouse-cloud-and-looker-at-the-uk-oracle-user-group-analytics-modernisation-forum-8th-october-2019',
    'https://www.rittmananalytics.com/blog/2019/5/27/drill-to-detail-ep66-etl-incorta-and-the-death-of-the-star-schema-with-special-guest-matthew-halliday',
    'https://www.rittmananalytics.com/blog/2019/4/5/2019-4-5-join-us-at-looker-join-2019-london-on-april-9th-the-brewery-52-chiswell-street',
    'https://www.rittmananalytics.com/blog/2019/04/01/2019-4-1-released-today-the-mome-project-multiple-olap-machine-emulator',
    'https://www.rittmananalytics.com/blog/2019/3/11/mjr-analytics-is-now-rittman-analytics-and-an-update-on-our-first-six-months',
    'https://www.rittmananalytics.com/blog/2019/03/05/2019-3-5-drill-to-detail-podcast-returns-with-ep60-a-deeper-look-into-looker-with-special-guest-lloyd-tabb',
    'https://www.rittmananalytics.com/blog/2018/11/11/2018-11-11-digital-analytics-bi-and-big-data-meetup-in-copenhagen-22111967',
    'https://www.rittmananalytics.com/blog/2018/10/22/2018-10-22-mjr-analytics-presenting-at-oracle-openworld-2018-san-francisco',
    'https://www.rittmananalytics.com/blog/2018/09/17/2018-09-17-introducing-mjr-analytics-and-how-two-years-go-so-fast-when-youre-learning-something-new',
    'https://www.rittmananalytics.com/blog/2018/08/27/2018-08-27-date-partitioning-and-table-clustering-in-google-bigquery-and-looker-pdts',
    'https://www.rittmananalytics.com/blog/2018/06/02/2018-06-02-oracle-big-data-cloud-event-hub-and-analytics-cloud-data-lake-edition-pt-3',
    'https://www.rittmananalytics.com/blog/2018/05/28/2018-05-28-oracle-big-data-cloud-event-hub-and-analytics-cloud-data-lake-edition-pt-2',
    'https://www.rittmananalytics.com/blog/2018/05/10/2018-05-10-oracle-big-data-cloud-event-hub-and-analytics-cloud-data-lake-edition-pt-1',
    'https://www.rittmananalytics.com/blog/2018/05/01/2018-05-02-using-looker-data-actions-to-make-monzo-spend-analysis-more-interactive-and-actionable',
    'https://www.rittmananalytics.com/blog/2018/04/27/2018-04-27-updates-to-oracle-analytics-cloud-oracle-biee-12c-and-oracle-dv-desktop',
    'https://www.rittmananalytics.com/blog/2018/04/21/2018-04-21-connecting-looker-to-oracle-autonomous-data-warehouse-cloud',
    'https://www.rittmananalytics.com/blog/2018/04/16/2018-04-16-first-impressions-of-oracle-autonomous-data-warehouse-cloud',
    'https://www.rittmananalytics.com/blog/2018/04/03/2018-04-03-timeline-charts-derived-tables-and-analytic-functions-in-looker-5',
    'https://www.rittmananalytics.com/blog/2018/03/05/2018-03-05-the-drill-to-detail-podcast-50th-episode-special-and-top-10-episodes-by-download',
    'https://www.rittmananalytics.com/blog/2018/02/05/2018-02-05-monzo-bigquery-looker-and-fintech-the-other-london-tech-startup-scene',
    'https://www.rittmananalytics.com/blog/2017/12/31/2017-12-31-oracle-analytics-cloud-and-the-welcome-return-of-the-enterprise-bi-platform',
    'https://www.rittmananalytics.com/blog/2017/12/03/2017-12-03-ukoug-tech17-and-the-incredible-world-of-ecommerce-analytics-machine-learning-and-11-marketing',
    'https://www.rittmananalytics.com/blog/2017/11/24/2017-11-24-query-federation-comes-to-looker-5-with-new-data-merge-feature',
    'https://www.rittmananalytics.com/blog/2017/11/24/2017-11-24-google-cloud-dataprep-spreadsheet-style-data-wrangling-powered-by-google-cloud-dataflow',
    'https://www.rittmananalytics.com/blog/2017/10/21/2017-10-21-druid-imply-and-looker-5-bring-olap-analysis-to-bigquerys-data-warehouse',
    'https://www.rittmananalytics.com/blog/2017/07/31/2017-07-31-using-google-bigquery-google-cloud-natural-language-api-and-looker-to-work-out-exactly-how-much',
    'https://www.rittmananalytics.com/blog/2017/06/10/2017-06-10-google-bigquery-large-table-joins-and-how-nested-repeated-values-and-the-capacitor-storage-format',
    'https://www.rittmananalytics.com/blog/2017/05/29/2017-05-29-analytic-views-oracle-database-12-2',
    'https://www.rittmananalytics.com/blog/2017/05/01/2017-05-01-what-bi-development-looks-like-with-bigquery-google-cloud-apis-looker-and-fluentd-courtesy-of',
    'https://www.rittmananalytics.com/blog/2017/02/23/2017-02-23-slides-from-my-new-world-hadoop-architectures-what-problems-they-really-solve-for-oracle-dbas',
    'https://www.rittmananalytics.com/blog/2017/02/21/2017-02-21-bigquery-looker-and-big-datas-rediscovery-of-data-warehousing-and-semantic-models-at-google',
    'https://www.rittmananalytics.com/blog/2017/02/03/2017-02-03-graph-analysis-in-the-how-a-tweet-went-viral-conference-presentation',
    'https://www.rittmananalytics.com/blog/2017/01/24/2017-01-24-qubits-journey-to-petabyte-scale-machine-learning-and-analytics-on-google-cloud-platform-and',
    'https://www.rittmananalytics.com/blog/2016/12/31/2016-12-31-drill-to-detail-podcast-looking-back-at-2016-and-whats-new-and-planned-for-2017',
    'https://www.rittmananalytics.com/blog/2016/12/02/2016-12-02-data-lakes-at-google-scale-the-end-of-meaningless-customer-experiences-and-ukoug-tech16-in',
    'https://www.rittmananalytics.com/blog/2016/10/30/2016-10-30-data-capital-competitive-strategy-and-the-economics-of-big-data-drill-to-detail-podcast-ep-6',
    'https://www.rittmananalytics.com/blog/2016/10/17/2016-10-17-slides-from-the-story-behind-the-11hr-cup-of-tea-wifi-kettles-how-it-was-all-about-data',
    'https://www.rittmananalytics.com/blog/2016/10/16/2016-10-16-interested-in-oracle-big-data-ml-and-next-gen-analytics-in-the-enterprise',
    'https://www.rittmananalytics.com/blog/2016/10/13/2016-10-13-the-ikettle-the-eleven-hour-struggle-to-make-a-cup-of-tea-and-why-it-was-all-about-data',
    'https://www.rittmananalytics.com/blog/2016/10/05/2016-10-05-drill-to-detail-ep',
    'https://www.rittmananalytics.com/blog/2016/09/25/2016-09-25-presenting-second-in-the-gluent-new-world-webinar-series-on-sql-on-hadoop-concepts-and',
    'https://www.rittmananalytics.com/blog/2016/09/25/2016-09-25-last-stop-budapest-and-five-new-bi-and-analytics-technologies-coming-soon-for-hadoop',
    'https://www.rittmananalytics.com/blog/2016/09/25/2016-09-25-from-lots-of-reports-with-some-data-analysis-to-massive-data-analysis-with-some-reporting',
    'https://www.rittmananalytics.com/blog/2016/09/24/2016-09-24-building-predictive-analytics-models-against-wearables-smart-home-and-smartphone-app-data-heres',
    'https://www.rittmananalytics.com/blog/2016/09/24/2016-09-24-podcast-episode-1-now-live-drill-to-detail-with-mark-rittman-with-special-guest-stewart-bryson'
    # Add more URLs here
    ]


    # Create a session with retry logic
    session = requests_retry_session()

    # Scrape each URL and store the results
    results = []
    for url in urls:
    print(f"Scraping {url}...")
    max_retries = 3
    for attempt in range(max_retries):
    try:
    result = scrape_squarespace_blog(url, session)
    if result:
    results.append(result)
    print("Done!")
    break
    except Exception as e:
    if attempt < max_retries - 1:
    wait_time = random.uniform(1, 3)
    print(f"An error occurred: {e}. Retrying in {wait_time:.2f} seconds...")
    time.sleep(wait_time)
    else:
    print(f"Failed to scrape {url} after {max_retries} attempts.")

    # Save results to a CSV file
    if results:
    csv_filename = f'scraped_blogs_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['url', 'title', 'author', 'date', 'content']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for result in results:
    writer.writerow(result)

    print(f"Results saved to {csv_filename}")

    # Display the first result
    print("\nFirst scraped article:")
    for key, value in results[0].items():
    if key == 'content':
    print(f"{key}: {value[:500]}...") # Print only first 500 characters of content
    else:
    print(f"{key}: {value}")
    else:
    print("No results were successfully scraped.")