Skip to content

Instantly share code, notes, and snippets.

@Podbrushkin
Created January 15, 2025 14:45
Show Gist options
  • Save Podbrushkin/e91bc33fc1d1b0ca66c9a835d8189d11 to your computer and use it in GitHub Desktop.
Save Podbrushkin/e91bc33fc1d1b0ca66c9a835d8189d11 to your computer and use it in GitHub Desktop.
Kinopoisk Download Votes

Powershell 7

Get-ChildItem *.html | ForEach-Object {
	$html = Get-Content -Raw $_
	$items = $html | Select-String '(?smi)\n {32}(<div class="item.*?)\n {32}</div>' -allmatches | % Matches | % {$_.Groups[1].Value}
	$items | % {
		$num = $_ -match '.*<div class="num">(\d*)</div>.*' ? $Matches[1] : $null
		$href,$title = $_ -match '.*<div class="nameRus"><a href="(.*?)">(.*?)<.*' ? $Matches[1,2] : $null
		$titleEn = $_ -match '.*<div class="nameEng">(.*?)<.*' ? $Matches[1] : $null
		$date = $_ -match '.*<div class="date">(.*?)<.*' ? $Matches[1] : $null
		$vote = $_ -match '.*<div class="vote">(.*?)<.*' ? $Matches[1] : $null
		$num,$href,$title,$titleEn,$date,$vote -join "`t"
	} | % {[System.Web.HttpUtility]::HtmlDecode($_)}
} | Sort-Object {[int]($_ -split "`t")[0]} > myVotes.tsv

Powershell 5

Get-ChildItem *.html | ForEach-Object {
	$html = Get-Content -Raw -Encoding UTF8 $_
	$items = $html | Select-String '(?smi)\n {32}(<div class="item.*?)\n {32}</div>' -allmatches | % Matches | % {$_.Groups[1].Value}
	$items | % {
		$num = if ($_ -match '.*<div class="num">(\d*)</div>.*') {$Matches[1]} else { $null }
		$href,$title = if ($_ -match '.*<div class="nameRus"><a href="(.*?)">(.*?)<.*') { $Matches[1,2]} else { $null,$null }
		$titleEn = if ($_ -match '.*<div class="nameEng">(.*?)<.*') {$Matches[1]} else { $null }
		$vote = if ($_ -match '.*<div class="vote">(.*?)<.*') {$Matches[1]} else { $null }
		$date = if ($_ -match '.*<div class="date">(.*?)<.*') {$Matches[1]} else { $null }
		$num,$href,$title,$titleEn,$date,$vote -join "`t"
	}
} | Sort-Object {[int]($_ -split "`t")[0]} > myVotes.tsv
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment