Trying to convert legacy MSDos Hebrew ascii print file to pdf with following script:
import sys
import re
from fpdf import FPDF
def convert_dos_hebrew_to_utf8(input_file, output_file):
with open(input_file, 'r', encoding='cp862') as file:
text = file.read()
with open(output_file, 'w', encoding='utf-8') as file:
file.write(text)
class PDF(FPDF):
def header(self):
self.set_font('Arial', 'B', 12)
self.cell(0, 10, 'Hebrew Text', 0, 1, 'C')
def chapter_title(self, title):
self.set_font('Arial', 'B', 12)
self.cell(0, 10, title, 0, 1, 'L')
self.ln(10)
def chapter_body(self, body):
self.set_font('Arial', '', 12)
self.multi_cell(0, 10, body)
self.ln()
def create_pdf(input_file, output_file):
pdf = PDF()
pdf.add_page()
pdf.set_auto_page_break(auto=True, margin=15)
with open(input_file, 'r', encoding='utf-8') as file:
text = file.read()
# Reverse Hebrew text if needed
reversed_text = reverse_hebrew(text)
pdf.chapter_title('Hebrew Text')
pdf.chapter_body(reversed_text)
pdf.output(output_file)
def reverse_hebrew(text):
# Regular expression to match Hebrew characters and Hebrew characters with spaces
hebrew_re = re.compile(r'([\u0590-\u05FF](?: [\u0590-\u05FF])*)|([\u0590-\u05FF]+)')
def reverse_match(match):
# Reverse the matched group, handling spaces if present
group = match.group()
if ' ' in group:
# Split by space, reverse, and join back with spaces
return ' '.join(group.split()[::-1])
else:
return group[::-1]
# Reverse the Hebrew text found by the regex
reversed_text = hebrew_re.sub(reverse_match, text)
return reversed_text
if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: python script.py <input_file> <output_file>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
# Convert the DOS Hebrew text to UTF-8
convert_dos_hebrew_to_utf8(input_file, 'output_utf8.txt')
# Create the PDF from the UTF-8 text
create_pdf('output_utf8.txt', output_file)
Using VSCode, the code shows no errors., but when I run in terminal:
python script.py input.prt output.pdf
I receive error: ModuleNotFoundError: No module named ‘fpdf’
Any help resolving this issue much appreciated