Skip to content

Instantly share code, notes, and snippets.

@hrlou
Last active December 13, 2020 19:44
Show Gist options
  • Save hrlou/2a9584d917e5e63cb0c33f21b3f9b432 to your computer and use it in GitHub Desktop.
Save hrlou/2a9584d917e5e63cb0c33f21b3f9b432 to your computer and use it in GitHub Desktop.
functions to get html, store it and it's information in a structure and parse them
/* functions to get html, store it and it's information in a structure and parse them.
* alot of this was taken from
* https://curl.se/libcurl/c/CURLOPT_WRITEFUNCTION.html
* I just wanted to modify it a little for my own purposes
* compile with "gcc curl_write-callback-and-parse.c -lcurl"
*/
#include <curl/curl.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
struct curl_memory_store {
char *data;
size_t size;
};
typedef struct curl_memory_store curl_memory;
static size_t write_callback(char* buf, size_t size, size_t nmemb, void *pass) {
size_t realsize = size*nmemb;
// interpret the typless pass
curl_memory *mem = (curl_memory *)pass;
mem->data = realloc(mem->data, mem->size + realsize + 1);
// copy buf (which contains the data in the curl buffer) into the newly re-allocated pointer
memcpy(&(mem->data[mem->size]), buf, realsize);
// iterate the size by the number of bytes handled
mem->size += realsize;
mem->data[mem->size] = 0;
return realsize;
}
curl_memory get_html(char* input_url) {
curl_memory html;
html.size = 0;
CURL* curl_get_html;
// initialise
curl_get_html = curl_easy_init();
curl_easy_setopt(curl_get_html, CURLOPT_URL, input_url);
// send the html data to the call back function
curl_easy_setopt(curl_get_html, CURLOPT_WRITEFUNCTION, &write_callback);
// give the callback function the file descriptor
curl_easy_setopt(curl_get_html, CURLOPT_WRITEDATA, (void *)&html);
curl_easy_perform(curl_get_html);
curl_easy_cleanup(curl_get_html);
return html;
}
void parse(curl_memory data) {
for (int i = 0; i <= data.size; i++) {
/* just an example parse to remove the less than/greater than symbols
do whatever you want with this */
if (data.data[i] != '<' && data.data[i] != '>') {
putchar(data.data[i]);
}
}
printf("%d\n", data.size);
}
void main(void) {
// curl_memory html = get_html("https://hral.xyz/");
parse(get_html("https://hral.xyz/"));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment