www

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

hash-files.py (3573B)


      1 #!/usr/bin/env python3
      2 
      3 import os
      4 import sys
      5 import subprocess
      6 
      7 def debug(s):
      8   #print(s, file=sys.stderr, flush=True)
      9   pass
     10 
     11 
     12 def hashFile(filename):
     13   result = subprocess.check_output(['sha256sum', '--binary', '--zero', filename])[0:64]
     14   debug("hashFile("+filename+") = "+str(result))
     15   return result
     16 
     17 def hash1(bytes_):
     18   result = subprocess.check_output(['sha256sum', '--binary', '--zero'], input=bytes_)[0:64]
     19   debug("hash1("+str(bytes_)+") = "+str(result))
     20   return result
     21 
     22 #
     23 # TODO: use this to get the hashes and names for all roots of the DAG (commits that are reachable only through one (or several) direct branch names, but not transitively as ancestors of other commits)
     24 #
     25 git_command='''
     26   (
     27     (
     28       git log --format=%P --all {HEAD} {FETCH_HEAD} | tr ' ' \\n | grep -v '^$' | LC_ALL=C sort -u | sed -e 'p;p';
     29       git rev-parse {HEAD} {FETCH_HEAD} --all | LC_ALL=C sort -u
     30     ) | LC_ALL=C sort | uniq -u;
     31     for ref in {HEAD} {FETCH_HEAD}; do echo "$(git rev-parse $ref) $ref"; done; git for-each-ref --format='%(objectname) %(refname)'
     32   ) | LC_ALL=C sort \
     33     | awk 'BEGIN {{ h="" }} {{ if (length($0) == 40) {{ h=$0 }} else {{ if (substr($0,1,40) == h) print $0 }} }}' \
     34     | LC_ALL=C sort -k 2
     35 '''
     36 
     37 def ref_exists(path, ref):
     38   try:
     39     subprocess.check_output("git rev-parse --verify "+ref+" 2>/dev/null", cwd=path, shell=True)
     40     return True
     41   except subprocess.CalledProcessError:
     42     return False
     43 
     44 def hashGit(path):
     45   FETCH_HEAD = "FETCH_HEAD" if ref_exists(path, "FETCH_HEAD") else ''
     46   HEAD       =       "HEAD" if ref_exists(path,       "HEAD") else ''
     47   result = subprocess.check_output(['sh', '-c', git_command.format(HEAD=HEAD, FETCH_HEAD=FETCH_HEAD)], cwd=path)
     48   debug("hashGit("+path+") = "+str(result))
     49   return result
     50 
     51 def hashSqlite3(path):
     52   result= subprocess.check_output(['sh', '-c', 'sqlite3 "$1" .dump | LC_ALL=C sort | sha256sum --binary --zero', '--', os.path.abspath(path)])
     53   debug("hashSqlite3("+path+") = "+str(result))
     54   return result
     55 
     56 def ignore_exitcode(cmd, **kwargs):
     57   try:
     58      return subprocess.check_output(cmd, **kwargs)
     59   except subprocess.CalledProcessError:
     60     return ''
     61 
     62 def is_git(x):
     63   return os.path.isdir(x) \
     64          and (ignore_exitcode("git rev-parse --is-inside-git-dir 2>/dev/null",   cwd=x, shell=True).strip() == b'true' or
     65               ignore_exitcode("git rev-parse --is-inside-work-tree 2>/dev/null", cwd=x, shell=True).strip() == b'true')
     66          # TODO: if a file which is inside a larger git dir is passed on the CLI, this still returns True :-(
     67 
     68 def recur(depth, x):
     69   # initial list of paths
     70   if isinstance(x, list):
     71     debug("ROOT " + str(depth) + ' [' + ', '.join(x) + ']')
     72     return b'root\0' + b''.join(recur(depth + 1, os.path.abspath(path)) + b'  ' + path.encode('utf-8') + b'\0' for path in sorted(x))
     73   # GIT repo
     74   elif is_git(x):
     75     debug("GIT DIR " + str(depth) + ' ' + x)
     76     return hash1(b'git-versioned folder\0' + hashGit(x))
     77   # directory
     78   elif os.path.isdir(x):
     79     debug("DIR " + str(depth) + ' ' + x)
     80     return hash1(b'directory\0' + b''.join(recur(depth + 1, os.path.join(x, entry)) + b'  ' + entry.encode('utf-8') + b'\0' for entry in sorted(os.listdir(x))))
     81   elif b'SQLite 3.x database' in subprocess.check_output(["file", x]):
     82     debug("SQLITE3 " + str(depth) + ' ' + x)
     83     return hashSqlite3(x)
     84   # Just a file
     85   elif os.path.isfile(x):
     86     debug("PLAIN FILE " + str(depth) + ' ' + x)
     87     return hashFile(x)
     88   else:
     89     sys.exit("unknown file type for %s" % os.path.abspath(x))
     90 
     91 print(hash1(recur(0, sys.argv[1:])).decode('utf-8'), flush=True)