Skip to content

Instantly share code, notes, and snippets.

@groundcat
Created November 23, 2024 09:10
Show Gist options
  • Save groundcat/6d81a42e009468c24a80d1c57a7211a8 to your computer and use it in GitHub Desktop.
Save groundcat/6d81a42e009468c24a80d1c57a7211a8 to your computer and use it in GitHub Desktop.
import re
from typing import List, Tuple
def parse_line(line: str) -> Tuple[List[str], str]:
line = line.strip()
if not line or line.startswith('//'):
return ([], line)
domain = line.lstrip('!').lstrip('*.')
labels = domain.split('.')
labels.reverse()
return (labels, line)
def should_sort(line: str) -> bool:
line = line.strip()
return line and not line.startswith('//')
def sort_block(block: List[str]) -> List[str]:
sortable_lines = []
non_sortable_indices = {}
for i, line in enumerate(block):
if should_sort(line):
sortable_lines.append(line)
else:
non_sortable_indices[i] = line
sorted_lines = sorted(sortable_lines, key=lambda x: parse_line(x)[0])
result = []
sortable_idx = 0
for i in range(len(block)):
if i in non_sortable_indices:
result.append(non_sortable_indices[i])
else:
result.append(sorted_lines[sortable_idx])
sortable_idx += 1
return result
def process_file(input_path: str, output_path: str):
try:
with open(input_path, 'r', encoding='utf-8') as f:
lines = f.readlines()
lines = [line.rstrip() + '\n' for line in lines]
blocks = []
current_block = []
for line in lines:
if line.strip() and line.strip().startswith('//') and current_block:
# Start of new block
blocks.append(current_block)
current_block = []
current_block.append(line)
if current_block:
blocks.append(current_block)
sorted_blocks = [sort_block(block) for block in blocks]
with open(output_path, 'w', encoding='utf-8') as f:
for block in sorted_blocks:
f.writelines(block)
print(f"Successfully processed {input_path} and wrote results to {output_path}")
except Exception as e:
print(f"Error processing file: {str(e)}")
if __name__ == "__main__":
input_file = "public_suffix_list.dat"
output_file = "public_suffix_list_sorted.dat"
process_file(input_file, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment