Last active
December 13, 2020 19:44
-
-
Save hrlou/2a9584d917e5e63cb0c33f21b3f9b432 to your computer and use it in GitHub Desktop.
functions to get html, store it and it's information in a structure and parse them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* functions to get html, store it and it's information in a structure and parse them. | |
* alot of this was taken from | |
* https://curl.se/libcurl/c/CURLOPT_WRITEFUNCTION.html | |
* I just wanted to modify it a little for my own purposes | |
* compile with "gcc curl_write-callback-and-parse.c -lcurl" | |
*/ | |
#include <curl/curl.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <stdio.h> | |
struct curl_memory_store { | |
char *data; | |
size_t size; | |
}; | |
typedef struct curl_memory_store curl_memory; | |
static size_t write_callback(char* buf, size_t size, size_t nmemb, void *pass) { | |
size_t realsize = size*nmemb; | |
// interpret the typless pass | |
curl_memory *mem = (curl_memory *)pass; | |
mem->data = realloc(mem->data, mem->size + realsize + 1); | |
// copy buf (which contains the data in the curl buffer) into the newly re-allocated pointer | |
memcpy(&(mem->data[mem->size]), buf, realsize); | |
// iterate the size by the number of bytes handled | |
mem->size += realsize; | |
mem->data[mem->size] = 0; | |
return realsize; | |
} | |
curl_memory get_html(char* input_url) { | |
curl_memory html; | |
html.size = 0; | |
CURL* curl_get_html; | |
// initialise | |
curl_get_html = curl_easy_init(); | |
curl_easy_setopt(curl_get_html, CURLOPT_URL, input_url); | |
// send the html data to the call back function | |
curl_easy_setopt(curl_get_html, CURLOPT_WRITEFUNCTION, &write_callback); | |
// give the callback function the file descriptor | |
curl_easy_setopt(curl_get_html, CURLOPT_WRITEDATA, (void *)&html); | |
curl_easy_perform(curl_get_html); | |
curl_easy_cleanup(curl_get_html); | |
return html; | |
} | |
void parse(curl_memory data) { | |
for (int i = 0; i <= data.size; i++) { | |
/* just an example parse to remove the less than/greater than symbols | |
do whatever you want with this */ | |
if (data.data[i] != '<' && data.data[i] != '>') { | |
putchar(data.data[i]); | |
} | |
} | |
printf("%d\n", data.size); | |
} | |
void main(void) { | |
// curl_memory html = get_html("https://hral.xyz/"); | |
parse(get_html("https://hral.xyz/")); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment