commit 8715c69e89d792411ad6141087718aa2fa55afb0 Author: derped Date: Sat Mar 21 08:20:49 2020 +0100 Init: Basic project structure and functionality. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d68d4d5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.pyc +.mypy_cache +__pycache__ +result \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a7d66a7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2019-2020 Kevin Baensch + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d18673c --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ +# fck - A simple checksum utility that just works +File checker (fck) is a python wrapper for various checksum functions (though right now it only supports CRC32). +It's goal is to make verifying large amounts of files both easier and faster. + +## Features +- Process/check multiple files at once. +- Automatically find expected checksums +- Quickly find faulty files. +- (Not yet) easily extendable + +## Syntax +``` +fck --help +usage: fck [-h] [-b] [-p PROCESSES] [-c CHECKSFV] [files [files ...]] + +Calculate CRC32 of files + +positional arguments: + files files and folders to process + +optional arguments: + -h, --help show this help message and exit + -b, --bigfiles parse files that exceed your memory limit + -p PROCESSES, --processes PROCESSES + -c CHECKSFV, --checksfv CHECKSFV +``` + +## Dependencies +- >= Python3.8 +- zlib // used to calculate CRC32 sums + +## Roadmap +### 0.1 +- [ ] Context based selection of apropriate checksum type. +- [ ] Define and document project/class structure. +- [ ] Support reading/writing '.sfv' files. +- [ ] Add Tests. +### 0.2 +- [ ] Dehardcode various CRC32 specific functionalities. +- [ ] Implement propper logging. +- [ ] Implement other common checksum types. +- [ ] Write a comprehensive documentation. +### Future +- [ ] More and better error (and memory) handling. +- [ ] Better logging/output control. +- [ ] Add an optional GUI. +- [ ] Package application for Windows/Mac/Linux (maybe) \ No newline at end of file diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..4d115c3 --- /dev/null +++ b/default.nix @@ -0,0 +1,19 @@ +{ buildPythonApplication, zlib, lib }: + +with lib; + +buildPythonApplication { + name = "fck"; + version = "0.1"; + + src = cleanSource ./.; + + buildInputs = [ zlib ]; + + meta = { + homepage = "https://git.ophanim.de/derped/fck"; + description = "A small checksum utility that just works."; + license = licenses.mit; + platforms = platforms.all; + }; +} diff --git a/deprecated/crc32sum_threaded.py b/deprecated/crc32sum_threaded.py new file mode 100755 index 0000000..17a9ab4 --- /dev/null +++ b/deprecated/crc32sum_threaded.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 + +# This software is licensed under the MIT License: +# Copyright (c) 2019-2020 Kevin Baensch + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + + +# DISCLAIMER +# This file will not receive any updates. +# This is my initial implementation of a crc32 checker. +# The code quality is kind of mediocre (as it was only intended for personal use) +# Still works perfectly fine and has the advantage of being compact (and not as bloated as the OO implementation) + + +# Cool things to come... eventually... maybe (when I'm motivated): +# - add support for sfv files +# - write checksums to files +# - better error handling (symlinks etc) +# - should under no circumstances crash (check for available memory) +# - lots and lots of cleanup +# - documentation +# - verify sfv files: re.fullmatch(r"^[0-9a-fA-F]$", s or "") is not None +# - contextmanagers are a cool way to handle exceptions https://www.python.org/dev/peps/pep-0343/ +# - if possible move file handling into separate function (or if really necessary a class) +# - unit tests +# - maybe use a logging library +# - turn this into a proper library -> move stuff into separate files + +""" ++---------------------+ +| CRC32 sum generator | ++---------------------+ +""" + +import argparse +from multiprocessing import Pool +from os import listdir, path +from typing import Generator, List, Tuple, Optional +from zlib import crc32 + + +def file_search(pathlist: List[str]) -> Generator[Tuple[(Optional[str], str)], None, None]: + """ + Generate file paths from given list of Paths. + + +------------+ + | Parameters | + +------------+ + | pathlist: List[str] + | List of files and directories. + + +--------+ + | Yields | + ---------+ + | file: Tuple[(None, str)] + | file is a Tuple containing: + | - a crc32 sum (None if unknown) + | - a files path string + """ + for fpath in pathlist: + if path.isfile(fpath): + if len(fpath) > 4 and fpath[-4:] == ".sfv": + yield from sfv_read(fpath) + else: + yield (None, fpath) + continue + if path.isdir(fpath): + yield from file_search([path.join(fpath, x) for x in listdir(fpath)]) + continue + print(f"[WARN]: No such file or directory: {fpath}") + + +def sfv_read(filename: str) -> Generator[Tuple[(Optional[str], str)], None, None]: + """ + Read sfv file. + """ + try: + with open(filename, 'r') as file: + yield from ((x.split()[-1], ' '.join(x.split()[:-1])) + for x in file.read().split('\n') + if len(x) != 0 and x[0] != ';') + except UnicodeDecodeError: + print(f"[ERR]: {filename} is not a text file.") + except FileNotFoundError: + print(f"[WARN]: No such file or directory: {filename}") + + +# This was never fully implemented before the OO rewrite - so it won't work. +def sfv_write(checked_files: List[Tuple[str, str, bool]], filename: str) -> None: + """ + Write sfv file. + """ + try: + with open(filename, 'w') as file: + checked_files.sort() + if any([not x[2] for x in checked_files]): + print(f"[WARN]: {filename} will contain unverified checksums.") +# [file.write(f"{str(x[0])}\t{str(x[1])}") for x in checked_files] + # Placeholder + file.write('\n'.join([x for x in ["hello" "world"]])) + except FileExistsError: + pass + + +def checkmark(check: Optional[bool] = None) -> str: + """ + Takes optional bool and returns colored string. + """ + return { + True: '\033[92m✓\033[0m', + False: '\033[91m❌\033[0m', + None: '\033[90?\033[0m' + }[check] + + +def file_check(filename: Tuple[Optional[str], str], + largefile: bool = False) -> Tuple[Tuple[str, str], bool]: + """ + Check given file and return checked file. + """ + cksum = "" + try: + with open(filename[1], 'rb') as file: + if not largefile: + cksum = str(hex(crc32(file.read())))[2:].upper() + else: + lastcksum = 0 + for line in file: + lastcksum = crc32(line, lastcksum) + cksum = str(hex(lastcksum))[2:].upper() + except FileNotFoundError: + print(f"[WARN]: No such file or directory: {filename}") + if len(cksum) < 8: + cksum = ''.join([((8 - len(cksum)) * "0"), cksum]) + check = ((cksum in filename[1].upper() and filename[0] is None) ^ + (filename[0] is not None and cksum == filename[0].upper())) + print(f"{checkmark(check)} \t {cksum} \t {filename[1]}") + return ((cksum, filename[1]), check) + + +def main() -> None: + """ + Main function, should only run when programm gets directly invoked. + """ + parser = argparse.ArgumentParser(description="Calculate CRC32 of files") + parser.add_argument("-b", "--bigfiles", + help="parse files that exceed your memory limit", + action="store_true") + parser.add_argument("-p", "--processes", + help="", + default=2, + type=int) + parser.add_argument("-c", "--checksfv", + help="", + default="", + type=str) + parser.add_argument("files", + help="files and folders to process", + nargs='*', + default=[]) + args = parser.parse_args() + + file_list = file_search(args.files) + ppool = Pool(processes=args.processes) + file_list_checked = ppool.starmap(file_check, [(x, args.bigfiles) + for x in list(file_list)]) + ppool.close() + print(f"[{len([x for x in file_list_checked if x[1]])}/{len(file_list_checked)}] Files passed") + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print("Exception: KeyboardInterrupt") diff --git a/fck/__init__.py b/fck/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/fck/checker.py b/fck/checker.py new file mode 100644 index 0000000..74a5590 --- /dev/null +++ b/fck/checker.py @@ -0,0 +1,32 @@ +from typing import Generator, List, Tuple, Optional +from .file import FILE +import re + + +def checkmark(value: Optional[bool] = None) -> str: + """ + Takes optional bool and returns colored string. + """ + return { + True: '\033[92m✓\033[0m', + False: '\033[91m❌\033[0m', + None: '\033[33m?\033[0m' + }[value] + + +def check(f: FILE, largefile: bool = False) -> bool: + """ + Check given file and return checked file. + """ + f.csum.reset() + try: + with open(f.fpath, 'rb') as file: + if not largefile: + f.csum.gensum(file.read()) + else: + for line in file: + f.csum.gensum(line) + except FileNotFoundError: + print(f"[WARN]: No such file or directory: {f.fpath}") + print(f"{checkmark(f.verify())} \t {f.csum} \t {f.esum} \t {f.fname}") + return f.verify() diff --git a/fck/cktype/__init__.py b/fck/cktype/__init__.py new file mode 100644 index 0000000..807210d --- /dev/null +++ b/fck/cktype/__init__.py @@ -0,0 +1,18 @@ +from re import search +from typing import Optional +from .crc32 import CRC32 + +CKTYPES = [ + (CRC32) +] + +def resolve(fname: str, esum: Optional[str] = None, cstype: Optional[str] = None): + """ + Checks fname input for checksum pattern and returns first match. + Can be overridden with cstype. + If neither is applicable the first possible checksum type will be returned. + """ + if esum is None and (match := CKTYPES[0].REGEX.search(fname)): + esum = match.group(0) + + return CKTYPES[0](), esum diff --git a/fck/cktype/crc32.py b/fck/cktype/crc32.py new file mode 100644 index 0000000..e69bf36 --- /dev/null +++ b/fck/cktype/crc32.py @@ -0,0 +1,29 @@ +from typing import Generator, List, Tuple, Optional +from re import compile, Pattern +from zlib import crc32 + + +class CRC32(object): + NAME: str = "CRC32" + EXT: List[str] = [".sfv"] + SYNTAX: List[Pattern] = [ + compile(r'^;*$'), + compile(r'^.* [0-9a-fA-F]{8}$') + ] + REGEX: Pattern = compile(r'[0-9a-fA-F]{8}') + + def __init__(self): + self.cksum: int = 0 + + def gensum(self, data: bytes): + self.cksum = crc32(data, self.cksum) + + def reset(self): + self.__init__() + + def __repr__(self) -> str: + cstring = str(hex(self.cksum))[2:].upper() + return ''.join([((8 - len(cstring)) * "0"), cstring]) + + def __eq__(self, other) -> bool: + return self.__repr__() == other.upper() diff --git a/fck/file.py b/fck/file.py new file mode 100644 index 0000000..812252c --- /dev/null +++ b/fck/file.py @@ -0,0 +1,18 @@ +from typing import Optional +from os import path +from . import cktype + + +class FILE: + def __init__(self, fpath: str, esum: Optional[str] = None): + self.fpath = fpath + self.fname = path.basename(fpath) + self.csum, self.esum = cktype.resolve(self.fname, esum) + + def __repr__(self): + return self.fname + + def verify(self) -> Optional[bool]: + if self.esum is None: + return None + return self.csum.__repr__() == self.esum diff --git a/fck/fileutils.py b/fck/fileutils.py new file mode 100644 index 0000000..ed88883 --- /dev/null +++ b/fck/fileutils.py @@ -0,0 +1,65 @@ +from typing import Generator, List, Tuple +from os import listdir, path +from .file import FILE + + +def search(pathlist: List[str]) -> Generator[FILE, None, None]: + """ + Generate file paths from given list of Paths. + + +------------+ + | Parameters | + +------------+ + | pathlist: List[str] + | List of files and directories. + + +--------+ + | Yields | + ---------+ + | file: Tuple[(None, str)] + | file is a Tuple containing: + | - a files path string + | - a crc32 sum (None if unknown) + """ + for fpath in pathlist: + if path.isfile(fpath): + if len(fpath) > 4 and fpath[-4:] == ".sfv": + yield from sfv_read(fpath) + else: + yield FILE(path.realpath(fpath)) + continue + if path.isdir(fpath): + yield from search([path.join(fpath, x) for x in listdir(fpath)]) + continue + print(f"[WARN]: No such file or directory: {fpath}") + + +def sfv_read(filename: str) -> Generator[FILE, None, None]: + """ + Read sfv file. + """ + try: + with open(filename, 'r') as file: + yield from (FILE(' '.join(x.split()[:-1]), x.split()[-1]) + for x in file.read().split('\n') + if len(x) != 0 and x[0] != ';') + except UnicodeDecodeError: + print(f"[ERR]: {filename} is not a text file.") + except FileNotFoundError: + print(f"[WARN]: No such file or directory: {filename}") + + +def sfv_write(checked_files: List[Tuple[str, str, bool]], filename: str) -> None: + """ + Write sfv file. + """ + try: + with open(filename, 'w') as file: + checked_files.sort() + if any([not x[2] for x in checked_files]): + print(f"[WARN]: {filename} will contain unverified checksums.") +# [file.write(f"{str(x[0])}\t{str(x[1])}") for x in checked_files] + + file.write('\n'.join([x for x in ["hello" "world"]])) + except FileExistsError: + pass diff --git a/scripts/fck b/scripts/fck new file mode 100755 index 0000000..815269c --- /dev/null +++ b/scripts/fck @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +""" ++---------------------------------+ +| CRC32 sum generator CLI utility | ++---------------------------------+ +""" + +import argparse +from multiprocessing import Pool +from fck import checker, fileutils + + +def main() -> None: + """ + Main function, should only run when programm gets directly invoked. + """ + parser = argparse.ArgumentParser(description="Calculate CRC32 of files") + parser.add_argument("-b", "--bigfiles", + help="parse files that exceed your memory limit", + action="store_true") + parser.add_argument("-p", "--processes", + help="", + default=2, + type=int) + parser.add_argument("-c", "--checksfv", + help="", + default="", + type=str) + parser.add_argument("files", + help="files and folders to process", + nargs='*', + default=[]) + args = parser.parse_args() + + file_list = fileutils.search(args.files) + ppool = Pool(processes=args.processes) + file_list_checked = ppool.starmap(checker.check, [(x, args.bigfiles) + for x in list(file_list)]) + ppool.close() + print(f"[{len([x for x in file_list_checked if x])}/{len(file_list_checked)}] Files passed") + + +if __name__ == '__main__': + try: + main() + except KeyboardInterrupt: + print("Exception: KeyboardInterrupt") diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..75a149d --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 +from setuptools import setup + +setup( + name='fck', + version='0.1', + description='A simple checksum utility that just works.', + url='https://git.ophanim.de/derped/fck', + python_requires='>3.8', + packages=['fck', 'fck/cktype'], + scripts=['scripts/fck'], +)