Init: Basic project structure and functionality.

This commit is contained in:
Kevin Baensch 2020-03-21 08:20:49 +01:00
commit 8715c69e89
Signed by: derped
GPG key ID: C0F1D326C7626543
13 changed files with 503 additions and 0 deletions

4
.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.pyc
.mypy_cache
__pycache__
result

20
LICENSE Normal file
View file

@ -0,0 +1,20 @@
Copyright (c) 2019-2020 Kevin Baensch
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

47
README.md Normal file
View file

@ -0,0 +1,47 @@
# fck - A simple checksum utility that just works
File checker (fck) is a python wrapper for various checksum functions (though right now it only supports CRC32).
It's goal is to make verifying large amounts of files both easier and faster.
## Features
- Process/check multiple files at once.
- Automatically find expected checksums
- Quickly find faulty files.
- (Not yet) easily extendable
## Syntax
```
fck --help
usage: fck [-h] [-b] [-p PROCESSES] [-c CHECKSFV] [files [files ...]]
Calculate CRC32 of files
positional arguments:
files files and folders to process
optional arguments:
-h, --help show this help message and exit
-b, --bigfiles parse files that exceed your memory limit
-p PROCESSES, --processes PROCESSES
-c CHECKSFV, --checksfv CHECKSFV
```
## Dependencies
- >= Python3.8
- zlib // used to calculate CRC32 sums
## Roadmap
### 0.1
- [ ] Context based selection of apropriate checksum type.
- [ ] Define and document project/class structure.
- [ ] Support reading/writing '.sfv' files.
- [ ] Add Tests.
### 0.2
- [ ] Dehardcode various CRC32 specific functionalities.
- [ ] Implement propper logging.
- [ ] Implement other common checksum types.
- [ ] Write a comprehensive documentation.
### Future
- [ ] More and better error (and memory) handling.
- [ ] Better logging/output control.
- [ ] Add an optional GUI.
- [ ] Package application for Windows/Mac/Linux (maybe)

19
default.nix Normal file
View file

@ -0,0 +1,19 @@
{ buildPythonApplication, zlib, lib }:
with lib;
buildPythonApplication {
name = "fck";
version = "0.1";
src = cleanSource ./.;
buildInputs = [ zlib ];
meta = {
homepage = "https://git.ophanim.de/derped/fck";
description = "A small checksum utility that just works.";
license = licenses.mit;
platforms = platforms.all;
};
}

192
deprecated/crc32sum_threaded.py Executable file
View file

@ -0,0 +1,192 @@
#!/usr/bin/env python3
# This software is licensed under the MIT License:
# Copyright (c) 2019-2020 Kevin Baensch
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# DISCLAIMER
# This file will not receive any updates.
# This is my initial implementation of a crc32 checker.
# The code quality is kind of mediocre (as it was only intended for personal use)
# Still works perfectly fine and has the advantage of being compact (and not as bloated as the OO implementation)
# Cool things to come... eventually... maybe (when I'm motivated):
# - add support for sfv files
# - write checksums to files
# - better error handling (symlinks etc)
# - should under no circumstances crash (check for available memory)
# - lots and lots of cleanup
# - documentation
# - verify sfv files: re.fullmatch(r"^[0-9a-fA-F]$", s or "") is not None
# - contextmanagers are a cool way to handle exceptions https://www.python.org/dev/peps/pep-0343/
# - if possible move file handling into separate function (or if really necessary a class)
# - unit tests
# - maybe use a logging library
# - turn this into a proper library -> move stuff into separate files
"""
+---------------------+
| CRC32 sum generator |
+---------------------+
"""
import argparse
from multiprocessing import Pool
from os import listdir, path
from typing import Generator, List, Tuple, Optional
from zlib import crc32
def file_search(pathlist: List[str]) -> Generator[Tuple[(Optional[str], str)], None, None]:
"""
Generate file paths from given list of Paths.
+------------+
| Parameters |
+------------+
| pathlist: List[str]
| List of files and directories.
+--------+
| Yields |
---------+
| file: Tuple[(None, str)]
| file is a Tuple containing:
| - a crc32 sum (None if unknown)
| - a files path string
"""
for fpath in pathlist:
if path.isfile(fpath):
if len(fpath) > 4 and fpath[-4:] == ".sfv":
yield from sfv_read(fpath)
else:
yield (None, fpath)
continue
if path.isdir(fpath):
yield from file_search([path.join(fpath, x) for x in listdir(fpath)])
continue
print(f"[WARN]: No such file or directory: {fpath}")
def sfv_read(filename: str) -> Generator[Tuple[(Optional[str], str)], None, None]:
"""
Read sfv file.
"""
try:
with open(filename, 'r') as file:
yield from ((x.split()[-1], ' '.join(x.split()[:-1]))
for x in file.read().split('\n')
if len(x) != 0 and x[0] != ';')
except UnicodeDecodeError:
print(f"[ERR]: {filename} is not a text file.")
except FileNotFoundError:
print(f"[WARN]: No such file or directory: {filename}")
# This was never fully implemented before the OO rewrite - so it won't work.
def sfv_write(checked_files: List[Tuple[str, str, bool]], filename: str) -> None:
"""
Write sfv file.
"""
try:
with open(filename, 'w') as file:
checked_files.sort()
if any([not x[2] for x in checked_files]):
print(f"[WARN]: {filename} will contain unverified checksums.")
# [file.write(f"{str(x[0])}\t{str(x[1])}") for x in checked_files]
# Placeholder
file.write('\n'.join([x for x in ["hello" "world"]]))
except FileExistsError:
pass
def checkmark(check: Optional[bool] = None) -> str:
"""
Takes optional bool and returns colored string.
"""
return {
True: '\033[92m✓\033[0m',
False: '\033[91m❌\033[0m',
None: '\033[90?\033[0m'
}[check]
def file_check(filename: Tuple[Optional[str], str],
largefile: bool = False) -> Tuple[Tuple[str, str], bool]:
"""
Check given file and return checked file.
"""
cksum = ""
try:
with open(filename[1], 'rb') as file:
if not largefile:
cksum = str(hex(crc32(file.read())))[2:].upper()
else:
lastcksum = 0
for line in file:
lastcksum = crc32(line, lastcksum)
cksum = str(hex(lastcksum))[2:].upper()
except FileNotFoundError:
print(f"[WARN]: No such file or directory: {filename}")
if len(cksum) < 8:
cksum = ''.join([((8 - len(cksum)) * "0"), cksum])
check = ((cksum in filename[1].upper() and filename[0] is None) ^
(filename[0] is not None and cksum == filename[0].upper()))
print(f"{checkmark(check)} \t {cksum} \t {filename[1]}")
return ((cksum, filename[1]), check)
def main() -> None:
"""
Main function, should only run when programm gets directly invoked.
"""
parser = argparse.ArgumentParser(description="Calculate CRC32 of files")
parser.add_argument("-b", "--bigfiles",
help="parse files that exceed your memory limit",
action="store_true")
parser.add_argument("-p", "--processes",
help="",
default=2,
type=int)
parser.add_argument("-c", "--checksfv",
help="",
default="",
type=str)
parser.add_argument("files",
help="files and folders to process",
nargs='*',
default=[])
args = parser.parse_args()
file_list = file_search(args.files)
ppool = Pool(processes=args.processes)
file_list_checked = ppool.starmap(file_check, [(x, args.bigfiles)
for x in list(file_list)])
ppool.close()
print(f"[{len([x for x in file_list_checked if x[1]])}/{len(file_list_checked)}] Files passed")
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("Exception: KeyboardInterrupt")

0
fck/__init__.py Normal file
View file

32
fck/checker.py Normal file
View file

@ -0,0 +1,32 @@
from typing import Generator, List, Tuple, Optional
from .file import FILE
import re
def checkmark(value: Optional[bool] = None) -> str:
"""
Takes optional bool and returns colored string.
"""
return {
True: '\033[92m✓\033[0m',
False: '\033[91m❌\033[0m',
None: '\033[33m?\033[0m'
}[value]
def check(f: FILE, largefile: bool = False) -> bool:
"""
Check given file and return checked file.
"""
f.csum.reset()
try:
with open(f.fpath, 'rb') as file:
if not largefile:
f.csum.gensum(file.read())
else:
for line in file:
f.csum.gensum(line)
except FileNotFoundError:
print(f"[WARN]: No such file or directory: {f.fpath}")
print(f"{checkmark(f.verify())} \t {f.csum} \t {f.esum} \t {f.fname}")
return f.verify()

18
fck/cktype/__init__.py Normal file
View file

@ -0,0 +1,18 @@
from re import search
from typing import Optional
from .crc32 import CRC32
CKTYPES = [
(CRC32)
]
def resolve(fname: str, esum: Optional[str] = None, cstype: Optional[str] = None):
"""
Checks fname input for checksum pattern and returns first match.
Can be overridden with cstype.
If neither is applicable the first possible checksum type will be returned.
"""
if esum is None and (match := CKTYPES[0].REGEX.search(fname)):
esum = match.group(0)
return CKTYPES[0](), esum

29
fck/cktype/crc32.py Normal file
View file

@ -0,0 +1,29 @@
from typing import Generator, List, Tuple, Optional
from re import compile, Pattern
from zlib import crc32
class CRC32(object):
NAME: str = "CRC32"
EXT: List[str] = [".sfv"]
SYNTAX: List[Pattern] = [
compile(r'^;*$'),
compile(r'^.* [0-9a-fA-F]{8}$')
]
REGEX: Pattern = compile(r'[0-9a-fA-F]{8}')
def __init__(self):
self.cksum: int = 0
def gensum(self, data: bytes):
self.cksum = crc32(data, self.cksum)
def reset(self):
self.__init__()
def __repr__(self) -> str:
cstring = str(hex(self.cksum))[2:].upper()
return ''.join([((8 - len(cstring)) * "0"), cstring])
def __eq__(self, other) -> bool:
return self.__repr__() == other.upper()

18
fck/file.py Normal file
View file

@ -0,0 +1,18 @@
from typing import Optional
from os import path
from . import cktype
class FILE:
def __init__(self, fpath: str, esum: Optional[str] = None):
self.fpath = fpath
self.fname = path.basename(fpath)
self.csum, self.esum = cktype.resolve(self.fname, esum)
def __repr__(self):
return self.fname
def verify(self) -> Optional[bool]:
if self.esum is None:
return None
return self.csum.__repr__() == self.esum

65
fck/fileutils.py Normal file
View file

@ -0,0 +1,65 @@
from typing import Generator, List, Tuple
from os import listdir, path
from .file import FILE
def search(pathlist: List[str]) -> Generator[FILE, None, None]:
"""
Generate file paths from given list of Paths.
+------------+
| Parameters |
+------------+
| pathlist: List[str]
| List of files and directories.
+--------+
| Yields |
---------+
| file: Tuple[(None, str)]
| file is a Tuple containing:
| - a files path string
| - a crc32 sum (None if unknown)
"""
for fpath in pathlist:
if path.isfile(fpath):
if len(fpath) > 4 and fpath[-4:] == ".sfv":
yield from sfv_read(fpath)
else:
yield FILE(path.realpath(fpath))
continue
if path.isdir(fpath):
yield from search([path.join(fpath, x) for x in listdir(fpath)])
continue
print(f"[WARN]: No such file or directory: {fpath}")
def sfv_read(filename: str) -> Generator[FILE, None, None]:
"""
Read sfv file.
"""
try:
with open(filename, 'r') as file:
yield from (FILE(' '.join(x.split()[:-1]), x.split()[-1])
for x in file.read().split('\n')
if len(x) != 0 and x[0] != ';')
except UnicodeDecodeError:
print(f"[ERR]: {filename} is not a text file.")
except FileNotFoundError:
print(f"[WARN]: No such file or directory: {filename}")
def sfv_write(checked_files: List[Tuple[str, str, bool]], filename: str) -> None:
"""
Write sfv file.
"""
try:
with open(filename, 'w') as file:
checked_files.sort()
if any([not x[2] for x in checked_files]):
print(f"[WARN]: {filename} will contain unverified checksums.")
# [file.write(f"{str(x[0])}\t{str(x[1])}") for x in checked_files]
file.write('\n'.join([x for x in ["hello" "world"]]))
except FileExistsError:
pass

47
scripts/fck Executable file
View file

@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""
+---------------------------------+
| CRC32 sum generator CLI utility |
+---------------------------------+
"""
import argparse
from multiprocessing import Pool
from fck import checker, fileutils
def main() -> None:
"""
Main function, should only run when programm gets directly invoked.
"""
parser = argparse.ArgumentParser(description="Calculate CRC32 of files")
parser.add_argument("-b", "--bigfiles",
help="parse files that exceed your memory limit",
action="store_true")
parser.add_argument("-p", "--processes",
help="",
default=2,
type=int)
parser.add_argument("-c", "--checksfv",
help="",
default="",
type=str)
parser.add_argument("files",
help="files and folders to process",
nargs='*',
default=[])
args = parser.parse_args()
file_list = fileutils.search(args.files)
ppool = Pool(processes=args.processes)
file_list_checked = ppool.starmap(checker.check, [(x, args.bigfiles)
for x in list(file_list)])
ppool.close()
print(f"[{len([x for x in file_list_checked if x])}/{len(file_list_checked)}] Files passed")
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("Exception: KeyboardInterrupt")

12
setup.py Normal file
View file

@ -0,0 +1,12 @@
#!/usr/bin/env python3
from setuptools import setup
setup(
name='fck',
version='0.1',
description='A simple checksum utility that just works.',
url='https://git.ophanim.de/derped/fck',
python_requires='>3.8',
packages=['fck', 'fck/cktype'],
scripts=['scripts/fck'],
)