Hello,
I am a brand new Python user. I used ChatGPT to develop the code below. However, I am getting the following error SyntaxError: multiple statements found while compiling a single statement. I have tried to edit but I keep coming back to the error.
Any ideas or guidance on how to correct this are appreciated
Here is the code:
import pandas as pd
from rapidfuzz import process, fuzz
‘Single Line’
# — CONFIGURATION —
bank_statement_file = ‘bank_statement.csv’
book_ledger_file = ‘book_ledger.csv’
output_reconciliation_file = ‘bank_reconciliation_final.xlsx’
fuzzy_threshold = 85 # similarity score threshold
# — LOAD DATA —
bank_df = pd.read_csv(bank_statement_file)
book_df = pd.read_csv(book_ledger_file)
# — CLEAN DATA —
def clean_data(df):
df\['Date'\] = pd.to_datetime(df\['Date'\])
df\['Description'\] = df\['Description'\].astype(str).str.strip().str.lower()
df\['Amount'\] = df\['Amount'\].astype(float)
return df
bank_df = clean_data(bank_df)
book_df = clean_data(book_df)
# — EXACT MATCHES —
bank_df[‘Match_Key’] = bank_df[‘Date’].astype(str) + ‘|’ + bank_df[‘Amount’].astype(str)
book_df[‘Match_Key’] = book_df[‘Date’].astype(str) + ‘|’ + book_df[‘Amount’].astype(str)
exact_matched = pd.merge(
bank_df, book_df,
on='Match_Key',
how='inner',
suffixes=('\_bank', '\_book')
)
exact_matched = exact_matched[exact_matched[‘Description_bank’] == exact_matched[‘Description_book’]]
unmatched_bank = bank_df[~bank_df[‘Match_Key’].isin(exact_matched[‘Match_Key’])]
unmatched_book = book_df[~book_df[‘Match_Key’].isin(exact_matched[‘Match_Key’])]
# — FUZZY MATCHES —
fuzzy_matches = []
for idx_b, bank_row in unmatched_bank.iterrows():
candidates = unmatched_book\[unmatched_book\['Amount'\] == bank_row\['Amount'\]\]
if candidates.empty:
continue
match_desc, score, match_idx = process.extractOne(
bank_row\['Description'\],
candidates\['Description'\],
scorer=fuzz.token_sort_ratio
)
if score >= fuzzy_threshold:
matched_book_row = candidates.loc\[match_idx\]
fuzzy_matches.append({
'Date_bank': bank_row\['Date'\],
'Description_bank': bank_row\['Description'\],
'Amount_bank': bank_row\['Amount'\],
'Date_book': matched_book_row\['Date'\],
'Description_book': matched_book_row\['Description'\],
'Amount_book': matched_book_row\['Amount'\],
'Match_Score': score
})
fuzzy_matched_df = pd.DataFrame(fuzzy_matches)
# Remove fuzzy-matched rows from unmatched sets
if not fuzzy_matched_df.empty:
matched_bank_keys = set(zip(fuzzy_matched_df\['Date_bank'\], fuzzy_matched_df\['Amount_bank'\], fuzzy_matched_df\['Description_bank'\]))
matched_book_keys = set(zip(fuzzy_matched_df\['Date_book'\], fuzzy_matched_df\['Amount_book'\], fuzzy_matched_df\['Description_book'\]))
unmatched_bank = unmatched_bank\[\~unmatched_bank.apply(lambda x: (x\['Date'\], x\['Amount'\], x\['Description'\]) in matched_bank_keys, axis=1)\]
unmatched_book = unmatched_book\[\~unmatched_book.apply(lambda x: (x\['Date'\], x\['Amount'\], x\['Description'\]) in matched_book_keys, axis=1)\]
# — CATEGORIZE UNMATCHED ITEMS —
def categorize_unmatched(bank_df, book_df):
categories = \[\]
for \_, row in book_df.iterrows():
if row\['Amount'\] < 0:
categories.append('Outstanding Check')
else:
categories.append('Deposit in Transit')
book_df = book_df.copy()
book_df\['Category'\] = categories
categories = \[\]
for \_, row in bank_df.iterrows():
if row\['Amount'\] < 0:
categories.append('Unrecorded Bank Withdrawal')
else:
categories.append('Unrecorded Bank Deposit')
bank_df = bank_df.copy()
bank_df\['Category'\] = categories
return bank_df, book_df
unmatched_bank, unmatched_book = categorize_unmatched(unmatched_bank, unmatched_book)
# — FLAG POTENTIAL DUPLICATES —
def flag_duplicates(df, label):
dup = df\[df.duplicated(subset=\['Date', 'Amount'\], keep=False)\]
dup = dup.assign(Potential_Duplicate_Label=label)
return dup
duplicates_bank = flag_duplicates(bank_df, ‘Bank’)
duplicates_book = flag_duplicates(book_df, ‘Book’)
# — SUMMARY —
summary = {
'Total Bank Transactions': \[len(bank_df)\],
'Total Book Transactions': \[len(book_df)\],
'Exact Matches': \[len(exact_matched)\],
'Fuzzy Matches': \[len(fuzzy_matched_df)\],
'Unmatched in Bank': \[len(unmatched_bank)\],
'Unmatched in Book': \[len(unmatched_book)\],
'Bank Duplicates': \[len(duplicates_bank)\],
'Book Duplicates': \[len(duplicates_book)\],
'Difference in Total Amount': \[round(bank_df\['Amount'\].sum() - book_df\['Amount'\].sum(), 2)\]
}
summary_df = pd.DataFrame(summary)
# — EXPORT RESULTS —
with pd.ExcelWriter(output_reconciliation_file) as writer:
summary_df.to_excel(writer, sheet_name='Summary', index=False)
exact_matched.to_excel(writer, sheet_name='Exact_Matched', index=False)
fuzzy_matched_df.to_excel(writer, sheet_name='Fuzzy_Matched', index=False)
unmatched_bank.to_excel(writer, sheet_name='Unmatched_Bank', index=False)
unmatched_book.to_excel(writer, sheet_name='Unmatched_Book', index=False)
duplicates_bank.to_excel(writer, sheet_name='Potential_Duplicates_Bank', index=False)
duplicates_book.to_excel(writer, sheet_name='Potential_Duplicates_Book', index=False)
print(“Bank recon done”)
print(f"Reconciliation report saved to: {output_reconciliation_file}")
