#!/usr/bin/env python3 import argparse import datetime import os import os.path import re import sys import whisper TCD_PATH = "/media/elf/USB DISK/RECORD/" tcd_record = re.compile("R(\d{8}-\d{6}).WAV$", re.IGNORECASE) CACHE_PATH = os.path.join(os.path.expanduser("~"), ".cache", "tcd") CACHE_FILE = os.path.join(CACHE_PATH, "lastfile") def converttcdfilename(filename): g = tcd_record.search(filename) if not g: print( "Filename {} did not match TCD file pattern. How did we get here?".format( filename ) ) sys.exit(1) tcd_time = datetime.datetime.strptime(g[1], "%Y%m%d-%H%M%S") return tcd_time.strftime("%A, %B %d %Y, %I:%M %p") def transcribe(filename: str): model = whisper.load_model("small").cuda() result = model.transcribe(filename, language="en", temperature=0.0) assert result["language"] == "en" return result["text"].strip() def transcribe_with_timestamp(filename: str): ourdate = converttcdfilename(filename) transcription = transcribe(filename) print("{}\n{}\n{}".format(ourdate, "-" * len(ourdate), transcription)) def getlastfile(): if not (os.path.isdir(CACHE_PATH) and os.path.isfile(CACHE_FILE)): return None with open(CACHE_FILE, "r") as lf: last = lf.readline() if not last: return None return last.strip() def writelast(filename: str): if not os.path.isdir(CACHE_PATH): os.makedirs(CACHE_PATH) with open(CACHE_FILE, "w") as lf: lf.write(filename) lf.write("\n") def main(): parser = argparse.ArgumentParser( description="Process Elf's Thought Capture Device" ) parser.add_argument( "-l", "--list", action="store_true", help="List all available files on TCD", ) parser.add_argument("-f", "--file", type=str, nargs=1, help="File to parse") parser.add_argument( "-i", "--item", nargs=1, type=int, help="File to parse, by index", ) parser.add_argument( "-a", "--all", action="store_true", help="Ignore cached last-read value, process entire TCD.", ) if not os.path.isdir(TCD_PATH): print("Could not identify path to TCD repository. Is it mounted?") sys.exit(1) files = [p for p in os.listdir(TCD_PATH) if tcd_record.match(p)] if len(files) < 1: print("No files in TCD repository to process.") sys.exit(0) args = vars(parser.parse_args()) if ( args and "file" in args and args["file"] is not None and len(args["file"]) > 0 ): transcribe(args["file"][0]) sys.exit(0) if args and "list" in args and args["list"]: for count, f in enumerate(files): print("{}: {}".format(count + 1, converttcdfilename(f))) sys.exit(0) if ( args and "item" in args and args["item"] is not None and len(args["item"]) > 0 ): pos = args["item"][0] if pos < 1 or pos > len(files): print( "There is no item at the index you provided. Legal range is 1 to {}".format( len(files) ) ) sys.exit(1) transcribe(args["file"][pos]) sys.exit(0) if args and "all" in args and args["all"]: for index, f in enumerate(files): transcribe_with_timestamp(os.path.join(TCD_PATH, f)) if index != len(files): print("\n\n") sys.exit(0) lastfile = getlastfile() files_to_transcribe = [f for f in files] if lastfile: try: lastindex = files_to_transcribe.index(lastfile) files_to_transcribe = files_to_transcribe[lastindex + 1 :] except: pass if len(files_to_transcribe) < 1: print("NOTICE: No new entries found to transcribe.") sys.exit(0) for index, f in enumerate(files_to_transcribe): transcribe_with_timestamp(os.path.join(TCD_PATH, f)) if index != len(files): print("\n\n") writelast(files[-1]) sys.exit(0) if __name__ == "__main__": main()