import pandas as pd import numpy as np import matplotlib.pyplot as plt from fuzzywuzzy import fuzz pd.set_option(`max_columns`, 50) import pyodbc from pandas.io import sql cols=[`ISSUER_NAME`,`CPTY_NAME`,`SIMILARITY_1`,`SIMILARITY_2`,`FLAG`] data = [] sql_conn=pyodbc.connect(`DRIVER={ODBC Driver 11 for SQL Server}; SERVER=RADO-THINKPAD; DATABASE=IVAN; Trusted_Connection=yes`) query = SELECT * FROM dbo.issuer TABLE = pd.read_sql(query, sql_conn) final = fuzz.token_set_ratio(s1, s2) for s1, s2 in TABLE.itertuples(index=False): final = fuzz.token_set_ratio(s1, s2) final1 = fuzz.partial_ratio(s1, s2) if final>50 and final1>50: flag=`YES` else: flag=`NO` data.append((s1, s2, final, final1, flag)) result = pd.DataFrame(data, columns=cols) result.to_csv(file_name, sep=`,`)
©