Startseite
Astronomie
Gipfelbuch
Photos
Whisky
Whiskyrechner
Passwortgenerator
Simpsons
Code
xkcd

Kleingedrucktes
Kontakt
#!/usr/bin/python2

import MySQLdb as mdb
import hashlib
import os
import threading
import time
from Queue import Queue

# Number of worker threads
concurrent_threads = 4

# These directories will be checked
check_dirs = [ '/etc', '/var/lib', '/var/lib64' , '/opt', '/srv', '/usr/bin', '/usr/lib32' , '/usr/lib' ]

# These directories will be excluded
excludes = ('/var/lib/mysql', '/var/lib/ntp', '/var/lib/dhcpcd', '/var/lib/pacman/local')


# Name of database
db_name = 'filehashes'

# DB user
db_user = 'root'

# DB password
db_pwd = 'mysecretpassword'

# Table name. Note: This table must have the columns "hash" and "filename"
table_name = 'file_hash'


# Queue for the filenames
fname_queue = Queue()

# List of found files
found_files = []

class myThread (threading.Thread):
    """
    This class is derived from the "Thread" class.
    The function "run" is called when the thread is started
    """
    def __init__(self,q):
        threading.Thread.__init__(self)
        # Each thread has to connect to the mysql database on its own
        self.con = mdb.connect('localhost', db_user, db_pwd, db_name);
        # "q" is the queue of filenames to work off
        self.q = q
    def run(self):
        process_file(self.q,self.con)


def mysql_command(command,con):
    """
    Execute a mysql command.
    command (string): the command to execute
    con (database object): the database to connect to
    """
    with con:
        cur = con.cursor()
        cur.execute("use " + db_name)
        cur.execute(command)
        return cur
        


def add_values(filenamhash,con):
    """
    Add a hash-filename pair to the database
    """
    i = 1
    tot = len(filenamhash)
    for filename in filenamhash:
        print('('+str(i)+'/'+str(tot)+') Adding '+filename)
        mysql_command("INSERT INTO " + table_name + " VALUES(\'"+filename+"\',\'"+filenamhash[filename]+"\')",con)
        i += 1



def update_value(filename,Hash,con):
    """
    Replace hash of a given filename by a new one in the database
    """
    mysql_command("UPDATE " + table_name + " set hash=\'"+Hash+"\' where binary filename=\'"+filename+"\'",con)


def get_hash(filename,con):
    """
    Returns the hash of a given filename from the database
    """
    Hash = ""
    cur = mysql_command("SELECT hash from " + table_name + " where binary filename=\'"+filename+"\'",con)
    for value in cur:
        Hash = value[0]

    return Hash
    


def compare_db_with_files(files):
    """
    Walk through every database entry and check if the file is still there on the system
    """
    con = mdb.connect('localhost', db_user, db_pwd, db_name);
    cur = mysql_command("SELECT * from " + table_name,con)

    for fname,Hash in cur:
        if fname not in files:
            delete_entry(fname,con)


def delete_entry(fname,con):
    """
    Delete entry with given filename from database
    """
    print("Deleting "+fname)
    mysql_command("DELETE from "+ table_name +" where binary filename=\'"+fname+"\'",con)


def get_md5sum(filename):
    """
    Returns actual md5sum of a file
    """
        try:
                myhash = hashlib.md5(open(filename, 'rb').read()).hexdigest()
        except:
                return None
        else:
                return myhash


def process_file(q,con):
    """
    This function is used by the threads to check if the actual hash of a filename is equal to the hash in the database
    """
    while True:
        # Get a filename from the queue
        fname = q.get()
        # Get the actual hash
        current_hash = get_md5sum(fname)
        if current_hash is not None:
            # Get hash which is stored in the database
            db_hash = get_hash(fname,con)
            if db_hash == "":
                add_values({fname:get_md5sum(fname)},con)
            elif current_hash != db_hash:
                print("Mismatch for file "+fname+" db: "+db_hash+" file: "+current_hash)
                update_value(fname,current_hash,con)
        # Tell the queue that this task has been completed
        q.task_done()
    




# Start threads
for i in range(concurrent_threads):
    worker = myThread(fname_queue)
    worker.setDaemon(True)
    worker.start()


# Find all files in the given directories and add the names to the queue
for check_dir in check_dirs:
    for root, dirs, files in os.walk(check_dir):
        for name in files:
            if not root.startswith(excludes):
                fname = os.path.join(root,name)
                if os.path.isfile(fname) and not os.path.islink(fname):
                    found_files.append(fname)
                    fname_queue.put(fname)    

# Wait for the queue to finish
fname_queue.join()



# Walk through database entries
print("Look for vanished files")
compare_db_with_files(found_files)