yurukov · July 23, 2025 19:27 · yurukov · Feb 4, 2025 · yurukov · Jul 23, 2025
diff --git a/Bulgarian cadastre opendata scrape b/Bulgarian cadastre opendata scrape
 Important notes:
 1. Please run this command only during the night when load on the server is low
 2. This loads all index files and documents from the opendata portal
 3. The script saves your last download time and downloads only those files that are marked as updates after
 4. Expected download size should be about 6G

 Steps:
 1. Open https://kais.cadastre.bg/bg/OpenData
 2. Open DevTools
 3. Find the second Fetch/XHR call to https://kais.cadastre.bg/bg/OpenData/Read (the one with payload)
 4. Copy it as curl command. It should have '--data-raw' at the end 
 5. Paste that command between the two mentions of 'endOfCommand' in the script replacing the command there
 6. Run the script with bash
diff --git a/scrape.sh b/scrape.sh
 #!/bin/bash

 mkdir -p index
 mkdir -p doc
 read -r -d '' curlcom <<- 'endOfCommand'
 curl 'https://kais.cadastre.bg/bg/OpenData/Read' \
  -H 'Accept: */*' \
  -H 'Accept-Language: en-US,en;q=0.9,bg;q=0.8,ar;q=0.7' \
  -H 'Cache-Control: no-cache' \
  -H 'Connection: keep-alive' \
  -H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \
  -H $'Cookie: csrf=CfDJ8N1Zr3r-DYZInfQcXSB9Jdu8vN8S8SGkP1qZza2NCmsFm5e2K6Fqk__Y7nEhFe7GlVf2T4jAN7wotc3YahNLDsl7DQsmAjDIXWRPcSVcrgdYXtdKTPT-Q8NC5PtXXfr8Mxj_iOvsoSMk8_aoYKgAmDo; s=CfDJ8N1Zr3r%2BDYZInfQcXSB9JdvuUfWX5HmqQ80Wn2TaUjH6rSaEey18vJJyxlHffxTFXA2dhWjzbUBHu6jHhErk0QJLbWaQYQ5M%2Bzlq%2FlZm6aXZ4E3mHielNAVyE%2FC9WagpR4pcKX2eRdFonA8kyP%2Fl3Pu1%2FYXo45KBan%2BsD6kPe7KE; cookiesgdpr=true; k-cookie=\u0021AIkeD5f4KbhJnBpYaued4ZpDLVf3ZOkIjGNvE1vFIuGRnTXNpeucsLcldRWeMKVzxd2MILZzOEpPLA==; TS01d36efd=01700d78c4508b08d4a924bc26e8ccb01c545be2a2aadcfc225788282cb01aeb5f1ff2976041b57bf22c07dfcf632dd11cd74206fec19d928f1e5a3e8b51c109beff9ae578' \
  -H 'Origin: https://kais.cadastre.bg' \
  -H 'Pragma: no-cache' \
  -H 'Referer: https://kais.cadastre.bg/bg/OpenData' \
  -H 'Sec-Fetch-Dest: empty' \
  -H 'Sec-Fetch-Mode: cors' \
  -H 'Sec-Fetch-Site: same-origin' \
  -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' \
  -H 'X-CSRF-TOKEN: CfDJ8IHdlVPWlvJIkB5g9NxdbAcjm-y5YozDYReNq5ky0i0n6DQ5BLyvSqqkAjL6v-aVYNQva8Cy___6BScVfsjrYd4p3myBVYe0fpHB3WCinqysQZjulNeAn8l45J7Pb6oDCy5UHnwHubQWYbLA2Bc3EbY' \
  -H 'X-Requested-With: XMLHttpRequest' \
  -H 'sec-ch-ua: "Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"' \
  -H 'sec-ch-ua-mobile: ?0' \
  -H 'sec-ch-ua-platform: "Linux"' \
  --data-raw 'path=%D0%BE%D0%B1%D0%BB%D0%B0%D1%81%D1%82+%D0%91%D1%83%D1%80%D0%B3%D0%B0%D1%81&target=%D0%BE%D0%B1%D0%BB%D0%B0%D1%81%D1%82+%D0%91%D1%83%D1%80%D0%B3%D0%B0%D1%81'
 endOfCommand


 curlcom=`echo "$curlcom" | sed 's_curl_curl -s_;$s_--data-raw.*__'`; 
 curlcom1=`echo "$curlcom" | sed 's_Content-Type:.*_Content-Length: 0'"'"' \\\\_'`;

 if [ ! -f LASTSCRAPE ]; then
  echo "1970-01-01" > LASTSCRAPE;
 fi
 lastscrape=`cat LASTSCRAPE`;
 echo "Last scrape was at $lastscrape";
 lastscrape=`date -d "$lastscrape" +%s`;

 echo "Downloading regions";
 eval "$curlcom1 -X 'POST'" > index/obsh_ref.0.json;

 echo -n "Downloading municipalities ";
 jq -r .[].Path index/obsh_ref.0.json | { while read i; do 
  f=`echo "$i" | sed 's_област __;s_[()]\+__g'`;
  eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.1.json";
  echo -n ".";
 done }
 echo " done";

 echo -n "Downloading settlements ";
 jq -r .[].Path index/*.1.json | { while read i; do 
  f=`echo "$i" | sed 's_област \|община __g;s_[()]\+__g;s_/_-_g'`;
  eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.2.json";
  echo -n ".";
 done }
 echo " done";

 echo -n "Downloading document list ";
 jq -r .[].Path index/*.2.json | { while read i; do 
  f=`echo "$i" | sed 's_област \|община \|с\. \|гр\. __g;s_ (\([0-9]\+\)_-\1_;s_[()]\+__g;s_/_-_g'`;
  eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.3.json";
  echo -n ".";
 done }
 echo " done";

 echo "Downloaded index files: "`ls index|wc -l`" data: "`du -sh index|sed 's_\s*index__'`;

 echo -n "Downloading archives ";
 jq -r '.[]|[.Path,.Modified]|@csv' index/*.3.json | { while read i; do 
  fd=`echo "$i" | sed 's_.*","__;s_"__g'`; 
  fd=`date -d "$fd" +%s`;
  if [ $lastscrape -gt $fd ]; then
    echo -n "x";
  else
    f=`echo "$i"| sed 's_",".*__;s_"__g;s_област \|община \|с\. \|гр\. __g;s_ (\([0-9]\+\)_-\1_;s_[()]\+__g;s_/_-_g'`;
    d=`echo "$i"| sed 's_",".*__;s_"__g' | jq -sRr @uri | sed 's_\.zip.*_.zip_'`;
    u=`echo "$curlcom" | sed 's_OpenData/Read_OpenData/Download?path='"$d"'_;$s_\$__'`;
    eval "$u" > "doc/$f";
    echo -n ".";
  fi;
 done }
 echo " done";

 echo "Downloaded doc files: "`ls doc|wc -l`" data: "`du -sh doc|sed 's_\s*doc__'`;

 date +"%Y-%m-%d %H:%M" > LASTSCRAPE
diff --git a/unpack.sh b/unpack.sh
 #!/bin/bash

 cd index
 zip -9r ../Bulgaria-kais-index.zip .
 cd ..

 rm -rf doc1 ownership geojson
 mkdir -p doc1
 cd doc1
 find ../doc -name "*.zip" | { while read i; do f=`echo "$i" | sed 's_\([0-9]\{5\}\).*_\1_;s_\.\./doc/__'`; mkdir -p "$f"; echo "n" | unzip -qu -O utf8 "$i" -d "$f"; echo $i; done }
 zip -9r ../Bulgaria-kais-docs.zip .

 cd ..
 mkdir ownership
 cd ownership

 find ../doc1 -name "*.xlsx" | { while read i; do 
  f=`echo $i|sed 's_\.\./doc1/__;s_.xlsx_.tsv_;s_/_-_'`; 
  ssconvert "$i" a.csv;
  mlr -I --csv --ofs "\t" -N clean-whitespace a.csv;
  sed -i '1d' a.csv;
  mv a.csv "$f";
  echo $f; 
 done }
 cd ..

 i=`find doc1 -name "*.xlsx"|head -1`;
 ssconvert "$i" a.csv;
 mlr -I --csv --ofs "\t" -N clean-whitespace a.csv;
 head -1 a.csv > header.tsv;
 rm a.csv;

 cat header.tsv > ownership-parcels.tsv
 find ownership/ -name "*ПИ.tsv" -exec cat {} \; >> ownership-parcels.tsv

 cat header.tsv > ownership-buildings.tsv
 find ownership/ -name "*сгради.tsv" -exec cat {} \; >> ownership-buildings.tsv

 cat header.tsv > ownership-units.tsv
 find ownership/ -name "*СОС.tsv" -exec cat {} \; >> ownership-units.tsv

 zip -9r Bulgaria-kais-owndership.zip ownership-*

 mkdir -p geojson
 cd geojson
 find ../doc1 -name *.shp | { while read i; do f=`echo $i| sed 's_\.\./doc1/__;s_/.*/_-_;s_ (Poly.*_.geojson_'`;   mapshaper -i "$i" -proj wgs84 -o precision=0.000001 "$f"; echo $i; done }
 zip -9r ../Bulgaria-kais-geojson.zip .
 cd ..
	Important notes:
	1. Please run this command only during the night when load on the server is low
	2. This loads all index files and documents from the opendata portal
	3. The script saves your last download time and downloads only those files that are marked as updates after
	4. Expected download size should be about 6G

	Steps:
	1. Open https://kais.cadastre.bg/bg/OpenData
	2. Open DevTools
	3. Find the second Fetch/XHR call to https://kais.cadastre.bg/bg/OpenData/Read (the one with payload)
	4. Copy it as curl command. It should have '--data-raw' at the end
	5. Paste that command between the two mentions of 'endOfCommand' in the script replacing the command there
	6. Run the script with bash
	#!/bin/bash

	mkdir -p index
	mkdir -p doc
	read -r -d '' curlcom <<- 'endOfCommand'
	curl 'https://kais.cadastre.bg/bg/OpenData/Read' \
	-H 'Accept: /' \
	-H 'Accept-Language: en-US,en;q=0.9,bg;q=0.8,ar;q=0.7' \
	-H 'Cache-Control: no-cache' \
	-H 'Connection: keep-alive' \
	-H 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8' \
	-H $'Cookie: csrf=CfDJ8N1Zr3r-DYZInfQcXSB9Jdu8vN8S8SGkP1qZza2NCmsFm5e2K6Fqk__Y7nEhFe7GlVf2T4jAN7wotc3YahNLDsl7DQsmAjDIXWRPcSVcrgdYXtdKTPT-Q8NC5PtXXfr8Mxj_iOvsoSMk8_aoYKgAmDo; s=CfDJ8N1Zr3r%2BDYZInfQcXSB9JdvuUfWX5HmqQ80Wn2TaUjH6rSaEey18vJJyxlHffxTFXA2dhWjzbUBHu6jHhErk0QJLbWaQYQ5M%2Bzlq%2FlZm6aXZ4E3mHielNAVyE%2FC9WagpR4pcKX2eRdFonA8kyP%2Fl3Pu1%2FYXo45KBan%2BsD6kPe7KE; cookiesgdpr=true; k-cookie=\u0021AIkeD5f4KbhJnBpYaued4ZpDLVf3ZOkIjGNvE1vFIuGRnTXNpeucsLcldRWeMKVzxd2MILZzOEpPLA==; TS01d36efd=01700d78c4508b08d4a924bc26e8ccb01c545be2a2aadcfc225788282cb01aeb5f1ff2976041b57bf22c07dfcf632dd11cd74206fec19d928f1e5a3e8b51c109beff9ae578' \
	-H 'Origin: https://kais.cadastre.bg' \
	-H 'Pragma: no-cache' \
	-H 'Referer: https://kais.cadastre.bg/bg/OpenData' \
	-H 'Sec-Fetch-Dest: empty' \
	-H 'Sec-Fetch-Mode: cors' \
	-H 'Sec-Fetch-Site: same-origin' \
	-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36' \
	-H 'X-CSRF-TOKEN: CfDJ8IHdlVPWlvJIkB5g9NxdbAcjm-y5YozDYReNq5ky0i0n6DQ5BLyvSqqkAjL6v-aVYNQva8Cy___6BScVfsjrYd4p3myBVYe0fpHB3WCinqysQZjulNeAn8l45J7Pb6oDCy5UHnwHubQWYbLA2Bc3EbY' \
	-H 'X-Requested-With: XMLHttpRequest' \
	-H 'sec-ch-ua: "Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"' \
	-H 'sec-ch-ua-mobile: ?0' \
	-H 'sec-ch-ua-platform: "Linux"' \
	--data-raw 'path=%D0%BE%D0%B1%D0%BB%D0%B0%D1%81%D1%82+%D0%91%D1%83%D1%80%D0%B3%D0%B0%D1%81&target=%D0%BE%D0%B1%D0%BB%D0%B0%D1%81%D1%82+%D0%91%D1%83%D1%80%D0%B3%D0%B0%D1%81'
	endOfCommand


	curlcom=`echo "$curlcom" \| sed 's_curl_curl -s_;$s_--data-raw.*__'`;
	curlcom1=`echo "$curlcom" \| sed 's_Content-Type:.*_Content-Length: 0'"'"' \\\\_'`;

	if [ ! -f LASTSCRAPE ]; then
	echo "1970-01-01" > LASTSCRAPE;
	fi
	lastscrape=`cat LASTSCRAPE`;
	echo "Last scrape was at $lastscrape";
	lastscrape=`date -d "$lastscrape" +%s`;

	echo "Downloading regions";
	eval "$curlcom1 -X 'POST'" > index/obsh_ref.0.json;

	echo -n "Downloading municipalities ";
	jq -r .[].Path index/obsh_ref.0.json \| { while read i; do
	f=`echo "$i" \| sed 's_област __;s_[()]\+__g'`;
	eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.1.json";
	echo -n ".";
	done }
	echo " done";

	echo -n "Downloading settlements ";
	jq -r .[].Path index/*.1.json \| { while read i; do
	f=`echo "$i" \| sed 's_област \\|община __g;s_[()]\+__g;s_/_-_g'`;
	eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.2.json";
	echo -n ".";
	done }
	echo " done";

	echo -n "Downloading document list ";
	jq -r .[].Path index/*.2.json \| { while read i; do
	f=`echo "$i" \| sed 's_област \\|община \\|с\. \\|гр\. __g;s_ (\([0-9]\+\)_-\1_;s_[()]\+__g;s_/_-_g'`;
	eval "$curlcom --data-urlencode 'target=$i'" > "index/$f.3.json";
	echo -n ".";
	done }
	echo " done";

	echo "Downloaded index files: "`ls index\|wc -l`" data: "`du -sh index\|sed 's_\s*index__'`;

	echo -n "Downloading archives ";
	jq -r '.[]\|[.Path,.Modified]\|@csv' index/*.3.json \| { while read i; do
	fd=`echo "$i" \| sed 's_.*","__;s_"__g'`;
	fd=`date -d "$fd" +%s`;
	if [ $lastscrape -gt $fd ]; then
	echo -n "x";
	else
	f=`echo "$i"\| sed 's_",".*__;s_"__g;s_област \\|община \\|с\. \\|гр\. __g;s_ (\([0-9]\+\)_-\1_;s_[()]\+__g;s_/_-_g'`;
	d=`echo "$i"\| sed 's_",".__;s_"__g' \| jq -sRr @uri \| sed 's_\.zip._.zip_'`;
	u=`echo "$curlcom" \| sed 's_OpenData/Read_OpenData/Download?path='"$d"'_;$s_\$__'`;
	eval "$u" > "doc/$f";
	echo -n ".";
	fi;
	done }
	echo " done";

	echo "Downloaded doc files: "`ls doc\|wc -l`" data: "`du -sh doc\|sed 's_\s*doc__'`;

	date +"%Y-%m-%d %H:%M" > LASTSCRAPE
	#!/bin/bash

	cd index
	zip -9r ../Bulgaria-kais-index.zip .
	cd ..

	rm -rf doc1 ownership geojson
	mkdir -p doc1
	cd doc1
	find ../doc -name ".zip" \| { while read i; do f=`echo "$i" \| sed 's_\([0-9]\{5\}\)._\1_;s_\.\./doc/__'`; mkdir -p "$f"; echo "n" \| unzip -qu -O utf8 "$i" -d "$f"; echo $i; done }
	zip -9r ../Bulgaria-kais-docs.zip .

	cd ..
	mkdir ownership
	cd ownership

	find ../doc1 -name "*.xlsx" \| { while read i; do
	f=`echo $i\|sed 's_\.\./doc1/__;s_.xlsx_.tsv_;s_/_-_'`;
	ssconvert "$i" a.csv;
	mlr -I --csv --ofs "\t" -N clean-whitespace a.csv;
	sed -i '1d' a.csv;
	mv a.csv "$f";
	echo $f;
	done }
	cd ..

	i=`find doc1 -name "*.xlsx"\|head -1`;
	ssconvert "$i" a.csv;
	mlr -I --csv --ofs "\t" -N clean-whitespace a.csv;
	head -1 a.csv > header.tsv;
	rm a.csv;

	cat header.tsv > ownership-parcels.tsv
	find ownership/ -name "*ПИ.tsv" -exec cat {} \; >> ownership-parcels.tsv

	cat header.tsv > ownership-buildings.tsv
	find ownership/ -name "*сгради.tsv" -exec cat {} \; >> ownership-buildings.tsv

	cat header.tsv > ownership-units.tsv
	find ownership/ -name "*СОС.tsv" -exec cat {} \; >> ownership-units.tsv

	zip -9r Bulgaria-kais-owndership.zip ownership-*

	mkdir -p geojson
	cd geojson
	find ../doc1 -name .shp \| { while read i; do f=`echo $i\| sed 's_\.\./doc1/__;s_/./_-_;s_ (Poly.*_.geojson_'`; mapshaper -i "$i" -proj wgs84 -o precision=0.000001 "$f"; echo $i; done }
	zip -9r ../Bulgaria-kais-geojson.zip .
	cd ..