| #!/usr/bin/python2 import MySQLdb as mdb import hashlib import os import threading import time from Queue import Queue # Number of worker threads concurrent_threads = 4 # These directories will be checked check_dirs = [ '/etc', '/var/lib', '/var/lib64' , '/opt', '/srv', '/usr/bin', '/usr/lib32' , '/usr/lib' ] # These directories will be excluded excludes = ('/var/lib/mysql', '/var/lib/ntp', '/var/lib/dhcpcd', '/var/lib/pacman/local') # Name of database db_name = 'filehashes' # DB user db_user = 'root' # DB password db_pwd = 'mysecretpassword' # Table name. Note: This table must have the columns "hash" and "filename" table_name = 'file_hash' # Queue for the filenames fname_queue = Queue() # List of found files found_files = [] class myThread (threading.Thread): """ This class is derived from the "Thread" class. The function "run" is called when the thread is started """ def __init__(self,q): threading.Thread.__init__(self) # Each thread has to connect to the mysql database on its own self.con = mdb.connect('localhost', db_user, db_pwd, db_name); # "q" is the queue of filenames to work off self.q = q def run(self): process_file(self.q,self.con) def mysql_command(command,con): """ Execute a mysql command. command (string): the command to execute con (database object): the database to connect to """ with con: cur = con.cursor() cur.execute("use " + db_name) cur.execute(command) return cur def add_values(filenamhash,con): """ Add a hash-filename pair to the database """ i = 1 tot = len(filenamhash) for filename in filenamhash: print('('+str(i)+'/'+str(tot)+') Adding '+filename) mysql_command("INSERT INTO " + table_name + " VALUES(\'"+filename+"\',\'"+filenamhash[filename]+"\')",con) i += 1 def update_value(filename,Hash,con): """ Replace hash of a given filename by a new one in the database """ mysql_command("UPDATE " + table_name + " set hash=\'"+Hash+"\' where binary filename=\'"+filename+"\'",con) def get_hash(filename,con): """ Returns the hash of a given filename from the database """ Hash = "" cur = mysql_command("SELECT hash from " + table_name + " where binary filename=\'"+filename+"\'",con) for value in cur: Hash = value[0] return Hash def compare_db_with_files(files): """ Walk through every database entry and check if the file is still there on the system """ con = mdb.connect('localhost', db_user, db_pwd, db_name); cur = mysql_command("SELECT * from " + table_name,con) for fname,Hash in cur: if fname not in files: delete_entry(fname,con) def delete_entry(fname,con): """ Delete entry with given filename from database """ print("Deleting "+fname) mysql_command("DELETE from "+ table_name +" where binary filename=\'"+fname+"\'",con) def get_md5sum(filename): """ Returns actual md5sum of a file """ try: myhash = hashlib.md5(open(filename, 'rb').read()).hexdigest() except: return None else: return myhash def process_file(q,con): """ This function is used by the threads to check if the actual hash of a filename is equal to the hash in the database """ while True: # Get a filename from the queue fname = q.get() # Get the actual hash current_hash = get_md5sum(fname) if current_hash is not None: # Get hash which is stored in the database db_hash = get_hash(fname,con) if db_hash == "": add_values({fname:get_md5sum(fname)},con) elif current_hash != db_hash: print("Mismatch for file "+fname+" db: "+db_hash+" file: "+current_hash) update_value(fname,current_hash,con) # Tell the queue that this task has been completed q.task_done() # Start threads for i in range(concurrent_threads): worker = myThread(fname_queue) worker.setDaemon(True) worker.start() # Find all files in the given directories and add the names to the queue for check_dir in check_dirs: for root, dirs, files in os.walk(check_dir): for name in files: if not root.startswith(excludes): fname = os.path.join(root,name) if os.path.isfile(fname) and not os.path.islink(fname): found_files.append(fname) fname_queue.put(fname) # Wait for the queue to finish fname_queue.join() # Walk through database entries print("Look for vanished files") compare_db_with_files(found_files) |