Source code for scrawler.utils.general_utils

"""General purpose utility functions."""
import datetime
import re
import functools


[docs]def sanitize_text(text: str, lower: bool = False) -> str: """Sanitize texts by removing unnecessary or unwanted characters.""" text = text.replace("\n", " ") # newline character text = text.replace("\t", " ") # tabulator text = text.replace("\r", " ") # alternative newline character text = text.strip() # spaces at beginning and end if lower: text = text.lower() text = re.sub("(<!--).+?(-->)", "", text) # remove HTML comments that can contain JavaScript code return text
[docs]def timing_decorator(func): """A function decorator to measure function runtime and print the runtime on the console.""" @functools.wraps(func) def timed(*args, **kw): start_time = datetime.datetime.now() result = func(*args, **kw) end_time = datetime.datetime.now() print(f"\nRuntime of method {func.__name__}: {end_time - start_time}") return result return timed
[docs]class ProgressBar: def __init__(self, total_length: int = 0, progress: int = 0, custom_message: str = "", width_in_command_line: int = 100, progress_char: str = "█", remaining_char: str = "-"): """Print a progress bar in the command line interface. Default looks like this: ``Custom Message |██████████----------| 50.0% (5 / 10)``. :param total_length: Absolute length of concept (e.g. total download size = 20,000 bytes). :param progress: Share of ``total_length`` already reached (e.g. 10,000 bytes already downloaded). :param custom_message: String to appear to the left of the progress bar. :param width_in_command_line: Number of characters used in print to display the progress bar. :param progress_char: Character to use for filling the progress bar. :param remaining_char: Character to use for the space not yet filled by progress. """ self.total_length = total_length self.progress = progress self.custom_msg = custom_message self.width_in_command_line = width_in_command_line self.progress_char = progress_char self.remaining_char = remaining_char
[docs] def update(self, iterations: int = 1, total_length_update: int = 0): """Update internal progress parameters. :param iterations: Used to update :attr:`progress`. :param total_length_update: Used to update :attr:`total_length`. """ self.progress += iterations self.total_length += total_length_update self.print()
[docs] def print(self): """Print current progress on the command line.""" try: percentage = self.progress / self.total_length except ZeroDivisionError: percentage = 0 no_progress_characters = int(percentage * self.width_in_command_line) no_remaining_characters = self.width_in_command_line - no_progress_characters progress_bar = self.progress_char * no_progress_characters + self.remaining_char * no_remaining_characters progress_in_numbers = f"{round(percentage * 100, 2)}% ({self.progress} / {self.total_length})" # e.g. "99.00% (99/100)" print(f"\r{self.custom_msg} |{progress_bar}| {progress_in_numbers}", end="")