Last active
February 27, 2022 09:19
-
-
Save bpanthi977/65d5364c4157ba25563e39cd67d8dc4f to your computer and use it in GitHub Desktop.
Create rclone filter file for given directory by parsing filter files recursively inside the given directory.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require 'asdf) | |
(let ((*trace-output* (make-string-output-stream))) | |
(asdf:load-systems :alexandria :serapeum :iolib/os)) | |
(defpackage #:rclone-filter | |
(:use #:cl) | |
(:local-nicknames (#:a #:alexandria) | |
(#:s #:serapeum))) | |
(in-package #:rclone-filter) | |
;; configuration variables | |
(defparameter *follow-symlinks* nil) | |
(defparameter *generated-filter-filename* ".rclone-filter0") | |
(defparameter *sync-generated-filter-file* nil) | |
(defparameter *user-filter-filename* ".rclone-filter") | |
(defparameter *include-local-gitignore* nil) | |
(defparameter *pattern-output-stream* nil) | |
(defparameter *ignored-directories* nil) | |
(defun write-pattern (pattern) | |
"write translated pattern to output file" | |
(write-line pattern *pattern-output-stream*)) | |
(defparameter *dirs* nil | |
"A list of directory names in reverse order of their traversal") | |
(defmacro inside-dir (dir &body body) | |
"use this macro to maintain `*dir*' and `*ignored-directories*' as we traverse into and out of subdirectories" | |
`(let ((*dirs* (cons ,dir *dirs*)) | |
(*ignored-directories* (ignored-directories dir))) | |
,@body)) | |
(defun add-dir-ignore (dir) | |
"add a rule to directory ignore list | |
`dir' is a list of dirnames (relative to *dirs*), and can include wildcards | |
`:any' to match any number of dirs, and `:one' to match one directory" | |
(push dir *ignored-directories*)) | |
(defun ignored-directory? (dir) | |
"return true if the directory `dir' (relative to *dirs*) is ignored" | |
(loop for (d . rest) in *ignored-directories* | |
when (and (stringp d) | |
(string-equal d dir) | |
(null rest)) | |
return t)) | |
(defun ignored-directories (dir) | |
"new value for `*ignored-directories*' when we move into a directory `dir'" | |
(let ((rules ())) | |
(loop for rule in *ignored-directories* | |
for (d . rest) in rule do | |
(cond ((equal d :any) | |
(push rule rules) | |
(push rest rules)) | |
((equal d :one) | |
(push rest rules)) | |
((string-equal d dir) | |
(push rest rules)))) | |
rules)) | |
(defun path-from-root () | |
"pathname from starting directory upto current directory position" | |
(s:string-join (reverse *dirs*) "/")) | |
(defun trim-pattern (pattern) | |
"Trim all whitespace on the left and all non-escaped whitespace on the right" | |
(let ((pos (position #\\ pattern :from-end t))) | |
(if (and pos (< (+ pos 1) (length pattern)) | |
(every #'s:whitespacep (subseq pattern (+ pos 2)))) | |
(subseq pattern | |
(position-if-not #'s:whitespacep pattern) | |
(+ pos 2)) | |
(s:trim-whitespace pattern)))) | |
(defun relative-patternp (pattern) | |
"Return T for patterns like /a/b/ /a a/b/c/ and NIL for patterns like a/ b/ | |
see gitignore(5) man page | |
If there is a separator at the beginning or middle (or both) of the pattern, then the | |
pattern is relative to the directory level of the particular .gitignore file itself. Otherwise the | |
pattern may also match at any level below the .gitignore level. | |
Note: this function assumes that pattern is trimmed of whitespaces" | |
(loop for char across pattern | |
for i from 1 below (length pattern) ;; i.e. last char won't be processed | |
for escaped = nil | |
do | |
(when (and (char= char #\/) | |
(not escaped)) | |
(return t)))) | |
(defun translate-gitignore (pattern &key prefix) | |
"Translate gitignore `pattern' to `rclone' pattern, append `prefix' to it, then write it" | |
(let* ((pattern (trim-pattern pattern)) | |
(slash (unless (char= #\/ (char pattern 0)) "/"))) | |
(flet ((add (&rest strings) | |
(write-pattern (apply #'concatenate 'string strings)))) | |
(cond (*dirs* | |
(let ((dirpath (path-from-root))) | |
(add prefix "/" dirpath slash pattern) | |
(unless (relative-patternp pattern) | |
(add prefix "/" dirpath "/**" slash pattern)))) | |
(t | |
(add prefix slash pattern) | |
(unless (relative-patternp pattern) | |
(add prefix "/**" slash pattern))))))) | |
(defun add-dir-ignore-git (pattern) | |
(let* ((pattern (trim-pattern pattern)) | |
(path (uiop:split-string pattern :separator "/")) | |
(dir nil)) | |
(when (a:ends-with #\/ pattern) | |
(setf path (butlast path))) | |
(unless (relative-patternp pattern) | |
(setf dir (cons :any dir))) | |
(loop for x in path do | |
(cond ((string-equal x "**") | |
(setf dir (cons :any dir))) | |
((string-equal x "*") | |
(setf dir (cons :one dir))) | |
(t (setf dir (cons x dir))))) | |
(add-dir-ignore (reverse dir)))) | |
(defun process-gitignore-file (file) | |
(format t " ~a~%" file) | |
(flet ((process-line (line) | |
(cond ((every #'s:whitespacep line) ;; empty lines | |
nil) | |
((char= #\# (char line 0)) ;; comments | |
nil) | |
((char= #\! (char line 0)) ;; include | |
(translate-gitignore (subseq line 1) :prefix "+ ")) | |
(t ;; exclude | |
(translate-gitignore line :prefix "- ") | |
(add-dir-ignore-git line))))) | |
(map nil #'process-line (uiop:read-file-lines file)))) | |
(defun translate-rclone (line) | |
"translate line from an rclone filter to be used in another rclone filter file at higher up directory" | |
(flet ((add (&rest strings) | |
(write-pattern (apply #'concatenate 'string strings)))) | |
(if (not *dirs*) | |
(write-pattern line) ;; write as it is | |
(let* ((prefix (subseq line 0 2)) | |
(rule (s:trim-whitespace (subseq line 2))) | |
(dirpath (path-from-root))) | |
(cond ((char= (char rule 0) #\/) | |
;; rule matches at the top level directory of filter file | |
(add prefix "/" dirpath rule)) | |
(t ;; rule can match at the top level or at any inside level | |
(add prefix "/" dirpath "/" rule) | |
(add prefix "/" dirpath "/**/" rule))))))) | |
(defun add-dir-ignore-rclone (pattern) | |
(let* ((pattern (trim-pattern pattern))) | |
(unless (char= #\/ (char pattern 0)) | |
(format t "rclone pattern invalid: ~a~%Must start with a /" pattern) | |
(return-from add-dir-ignore-rclone nil)) | |
(when (or (a:ends-with #\/ pattern) | |
(a:ends-with-subseq "**" pattern) | |
(a:ends-with-subseq "*" pattern)) | |
(let ((path (butlast (uiop:split-string (subseq pattern 1) :separator "/"))) | |
(dir ())) | |
(loop for x in path do | |
(cond ((string-equal x "**") | |
(setf dir (cons :any dir))) | |
((string-equal x "*") | |
(setf dir (cons :one dir))) | |
(t (setf dir (cons x dir))))) | |
(add-dir-ignore (reverse dir)))))) | |
(defun process-rclone-file (file) | |
(format t " ~a~%" file) | |
(flet ((process-line (line) | |
(cond ((every #'s:whitespacep line) nil) | |
((char= #\# (char line 0)) nil) | |
((let ((type (char line 0))) | |
(and (> (length line) 2) | |
(or (equal type #\+) (equal type #\-)) | |
(char= (char line 1) #\Space))) | |
;; when valid filter rule | |
(translate-rclone line) | |
(if (equal (char line 0) #\-) | |
(add-dir-ignore-rclone (subseq line 2))))))) | |
(let* ((lines (uiop:read-file-lines file)) | |
;; to parse rules only after the `!' clearing rule (if it exists) | |
(clearing-rule-position (position-if #'(lambda (line) | |
(and (> (length line) 0) | |
(char= (char line 0) #\!) | |
(every #'s:whitespacep (subseq line 0)))) | |
lines | |
:from-end t))) | |
(map nil #'process-line (if clearing-rule-position | |
(subseq lines (1+ clearing-rule-position)) | |
lines))))) | |
(defun traverse-directories (path) | |
(flet ((recurse (dir) | |
(let ((dirname (car (last (pathname-directory dir))))) | |
(unless (or (equal dirname ".git") | |
(ignored-directory? dirname)) | |
(inside-dir dirname | |
(traverse-directories dir)))))) | |
;; process ignore files if they exist | |
(a:when-let ((file (and *include-local-gitignore* | |
(probe-file (merge-pathnames #p"./.git/info/exclude" path))))) | |
(process-gitignore-file file)) | |
(a:when-let ((file (probe-file (merge-pathnames #p".gitignore" path)))) | |
(process-gitignore-file file)) | |
(a:when-let ((file (probe-file (merge-pathnames *user-filter-filename* path)))) | |
(process-rclone-file file)) | |
;; move into subdirectories | |
(if *follow-symlinks* | |
(map nil #'recurse (uiop:subdirectories path)) | |
(map nil #'(lambda (dir) | |
(unless (eql (iolib/os:file-kind dir) :symbolic-link) | |
(recurse dir))) | |
(uiop:subdirectories path))))) | |
(defun main0 (root) | |
(let ((filter-file (make-pathname :name *generated-filter-filename* :defaults root))) | |
(format t "Generating filter file for ~a at ~a ~%" root filter-file) | |
(with-open-file (*pattern-output-stream* filter-file | |
:direction :output | |
:if-exists :supersede) | |
(traverse-directories root) | |
(unless *sync-generated-filter-file* | |
(write-pattern (format nil "- /~a" *generated-filter-filename*)))))) | |
(defun main () | |
(let ((args (uiop:command-line-arguments))) | |
(flet ((arg? (string) | |
(find string args :test #'string-equal)) | |
(done () | |
(return-from main))) | |
(when (arg? "--help") | |
(format t "Create rclone filter file for given directory by parsing | |
filter files recursively inside the given directory. | |
Filter files include: .rclone-filter and .gitignore | |
USAGE: rclone-filter.lisp [flags] directory | |
flags include: | |
--help Displays this help message | |
--follow-symlink | |
-L Similar to rclone's -L flag, tells whether to follow directory symlinks | |
while searching for ignore files | |
--local-gitignore | |
Whether to parse .git/info/exclude file or not | |
--user-filter-file \"filename\" | |
Supply filter file name maintained manually (Default: .rclone-filter) | |
Rules from this file are parsed and added to generated file | |
-O Filter file to generate (Default: .rclone-filter0) | |
--sync-filter-file | |
By default a rule to ignore itself is added in the generated filter file | |
When this flag is provided, don't add that rule. | |
Note: No rule to ignore user filter files is added in anycase. | |
After generating the filter file, potential usecase is such as: | |
rclone path/of/source path/of/dest --filter-from path/of/source/.rclone-filter0 | |
") | |
(done)) | |
(when (or (arg? "--follow-symblink") | |
(arg? "-L")) | |
(setf *follow-symlinks* t)) | |
(when (arg? "--local-gitignore") | |
(setf *include-local-gitignore* t)) | |
(when (arg? "--user-filter-file") | |
(let ((p (position "--user-filter-file" args :test #'string-equal))) | |
(when (< (length args) (+ p 2)) | |
(format t "ERROR: Supply file name to --user-filter-file") | |
(done)) | |
(setf *user-filter-filename* (nth (1+ p) args)))) | |
(when (or (= (length args) 0) | |
(and (arg? "--user-filter-file") | |
(< (length args) 3))) | |
(format t "Invalid args. See --help") | |
(done)) | |
(when (arg? "--sync-filter-file") | |
(setf *sync-generated-filter-file* t)) | |
(let ((truename (ignore-errors (truename (car (last args)))))) | |
(unless truename | |
(format t "Path doesn't exist ~a" (car (last args))) | |
(done)) | |
(main0 (truename (car (last args)))))))) | |
;;; testing | |
(defun test% (exclude-file rclone-lsf) | |
(let ((included (uiop:read-file-lines rclone-lsf)) | |
(excluded (uiop:read-file-lines exclude-file))) | |
(map 'nil #'print (intersection included excluded :test #'string-equal)))) | |
(defun test (root) | |
(let ((excludelist (make-pathname :name "excludelist" :defaults root)) | |
(filter-file (make-pathname :name *generated-filter-filename* :defaults root)) | |
(rclone-lsf (make-pathname :name "rclone-lsf" :defaults root))) | |
(flet ((cmd (format-string &rest args) | |
(let ((str (apply #'format nil format-string args))) | |
(format t "~a~%" str) | |
str))) | |
(format t "generating filter list~%") | |
(let ((*include-local-gitignore* t)) | |
(main0 root)) | |
(format t "generating excludelist~%") | |
(uiop:run-program (cmd "/home/bpanthi/Development/python/rclone-exclude.py ~a ~a --no-exclude-self" | |
root excludelist)) | |
(format t "generating inclusion list~%") | |
(uiop:run-program (cmd "rclone lsf ~a -R ~a --filter-from ~a > ~a" | |
(if *follow-symlinks* "-L" "") | |
root | |
filter-file | |
(make-pathname :name "rclone-lsf" :defaults root)) | |
:ignore-error-status t | |
:error-output *standard-output*) | |
(format t "~&(test% #p\"~a\" #p\"~a\")~%" excludelist rclone-lsf) | |
(test% excludelist rclone-lsf)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment