hrlou · December 13, 2020 19:44
diff --git a/curl_write-callback-and-parse.c b/curl_write-callback-and-parse.c
 /*  functions to get html, store it and it's information in a structure and parse them.
 *  alot of this was taken from
 *  https://curl.se/libcurl/c/CURLOPT_WRITEFUNCTION.html
 *  I just wanted to modify it a little for my own purposes
 *  compile with "gcc curl_write-callback-and-parse.c -lcurl"
 */


 #include <curl/curl.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>

 struct curl_memory_store {
    char *data;
    size_t size;
 };

 typedef struct curl_memory_store curl_memory;

 static size_t write_callback(char* buf, size_t size, size_t nmemb, void *pass) {
    size_t realsize = size*nmemb;
    // interpret the typless pass
    curl_memory *mem = (curl_memory *)pass;
    mem->data = realloc(mem->data, mem->size + realsize + 1);
    // copy buf (which contains the data in the curl buffer) into the newly re-allocated pointer
    memcpy(&(mem->data[mem->size]), buf, realsize);
    // iterate the size by the number of bytes handled
    mem->size += realsize;
    mem->data[mem->size] = 0;
    return realsize;
 }

 curl_memory get_html(char* input_url) {
    curl_memory html;
    html.size = 0;
    CURL* curl_get_html;
    // initialise
    curl_get_html = curl_easy_init();
    curl_easy_setopt(curl_get_html, CURLOPT_URL, input_url);
    // send the html data to the call back function
    curl_easy_setopt(curl_get_html, CURLOPT_WRITEFUNCTION, &write_callback);
    // give the callback function the file descriptor 
    curl_easy_setopt(curl_get_html, CURLOPT_WRITEDATA, (void *)&html);
    curl_easy_perform(curl_get_html);
    curl_easy_cleanup(curl_get_html);
    return html;
 }

 void parse(curl_memory data) {
    for (int i = 0; i <= data.size; i++) {
        /*  just an example parse to remove the less than/greater than symbols
            do whatever you want with this */
        if (data.data[i] != '<' && data.data[i] != '>') {
            putchar(data.data[i]);
        }
    }
    printf("%d\n", data.size);
 }

 void main(void) {
    // curl_memory html = get_html("https://hral.xyz/");
    parse(get_html("https://hral.xyz/"));
 }
	/* functions to get html, store it and it's information in a structure and parse them.
	* alot of this was taken from
	* https://curl.se/libcurl/c/CURLOPT_WRITEFUNCTION.html
	* I just wanted to modify it a little for my own purposes
	* compile with "gcc curl_write-callback-and-parse.c -lcurl"
	*/


	#include <curl/curl.h>
	#include <stdlib.h>
	#include <string.h>
	#include <stdio.h>

	struct curl_memory_store {
	char *data;
	size_t size;
	};

	typedef struct curl_memory_store curl_memory;

	static size_t write_callback(char* buf, size_t size, size_t nmemb, void *pass) {
	size_t realsize = size*nmemb;
	// interpret the typless pass
	curl_memory mem = (curl_memory )pass;
	mem->data = realloc(mem->data, mem->size + realsize + 1);
	// copy buf (which contains the data in the curl buffer) into the newly re-allocated pointer
	memcpy(&(mem->data[mem->size]), buf, realsize);
	// iterate the size by the number of bytes handled
	mem->size += realsize;
	mem->data[mem->size] = 0;
	return realsize;
	}

	curl_memory get_html(char* input_url) {
	curl_memory html;
	html.size = 0;
	CURL* curl_get_html;
	// initialise
	curl_get_html = curl_easy_init();
	curl_easy_setopt(curl_get_html, CURLOPT_URL, input_url);
	// send the html data to the call back function
	curl_easy_setopt(curl_get_html, CURLOPT_WRITEFUNCTION, &write_callback);
	// give the callback function the file descriptor
	curl_easy_setopt(curl_get_html, CURLOPT_WRITEDATA, (void *)&html);
	curl_easy_perform(curl_get_html);
	curl_easy_cleanup(curl_get_html);
	return html;
	}

	void parse(curl_memory data) {
	for (int i = 0; i <= data.size; i++) {
	/* just an example parse to remove the less than/greater than symbols
	do whatever you want with this */
	if (data.data[i] != '<' && data.data[i] != '>') {
	putchar(data.data[i]);
	}
	}
	printf("%d\n", data.size);
	}

	void main(void) {
	// curl_memory html = get_html("https://hral.xyz/");
	parse(get_html("https://hral.xyz/"));
	}
No results found