.
import numpy as np
import pandas as pd
from flask import Flask, request, jsonify, render_template
from gensim.models import Doc2Vec
from sklearn import utils
from sklearn.model_selection import train_test_split
from gensim.models.doc2vec import TaggedDocument, Doc2Vec
import numpy as np
import pandas as pd
import re
#nltk.download('punkt')
#nltk.download('wordnet')
from datetime import date
from flask import Flask, request, jsonify, render_template
import pickle
#from sklearn.externals import joblib
import joblib
from numpy import array, ndarray, string_
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import joblib
#drive.mount('/content/drive')
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import regularizers
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation, Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam, Adadelta, RMSprop
from sentence_transformers import SentenceTransformer
#from keras.optimizers import SGD, Adam, Adadelta, RMSprop
import keras.backend as K
# Train-Test
from sklearn.model_selection import train_test_split
# Scaling data
from sklearn.preprocessing import StandardScaler
# Classification Report
from sklearn.metrics import classification_report
import os
#import pickle
def time_cal(df):
arr=[]
for i,j in zip(df['date_received'][:],df['date_sent_to_company'][:]):
d1=i.split("/")
d2=j.split("/")
d1_=date(int(d1[2]),int(d1[0]),int(d1[1]))
d2_=date(int(d2[2]),int(d2[0]),int(d2[1]))
arr.append((d2_-d1_).days)
return arr
# Filling up null values
def fill_up_missingvalues(df, column_name, fillvalue):
df[column_name].fillna(fillvalue , inplace=True)
# filling up new categories
def fill_up_newcategories(df, column_name,types,fillvalue):
d={}
df[column_name].fillna(fillvalue, inplace=True)
for i in df[column_name].values:
if i not in types:
d[i] = fillvalue
else:
d[i]=i
df[column_name] = df[column_name].map(d)
#one hot encoding categorical features
def one_hotencode(df, column_name, model_label, model_onehot):
integer_encoded = model_label.transform(array(df[column_name].values))
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
return model_onehot.transform(integer_encoded)
# text preprocessiing
def preprocessing(df,column):
result=[]
for element in df[column]:
s=re.sub(r'[^\w\s]','',element) #removing special characters
s=(s.replace('\n','')).lower()
result.append(s) #removing new line
return result
# Vectorizing text
def vectorizing_text(text, model):
return model.encode(text)
types_products = joblib.load('types_product_joblib')
types_of_subproducts = joblib.load('types_subproduct_joblib')
types_issue = joblib.load('types_issue_joblib')
types_crc = joblib.load('types_crc_joblib')
# For label encoding and one- hot encoding
label_encoder_product = joblib.load('label_encoder_product_joblib')
integer_encoder_product = joblib.load('integer_encoder_product_joblib')
label_encoder_subproduct = joblib.load('label_encoder_subproduct_joblib')
integer_encoder_subproduct = joblib.load('integer_encoder_subproduct_joblib')
label_encoder_crc = joblib.load('label_encoder_crc_joblib')
integer_encoder_crc = joblib.load('integer_encoder_crc_joblib')
label_encoder_issue = joblib.load('label_encoder_issue_joblib')
integer_encoder_issue = joblib.load('integer_encoder_issue_joblib')
app = Flask(__name__)
#model = pickle.load(open('model.pkl', 'rb'))
@app.route('/')
def home():
return render_template('index.html')
@app.route('/predict',methods=['POST'])
def predict():
'''
For rendering results on HTML GUI
'''
features = request.form.values()
z = [x for x in features]
d={}
columns=['company_response_to_consumer','consumer_complaint_narrative','sub_product','product','date_received','date_sent_to_company']
d={}
for i in range(0,len(z)):
a=[]
a.append(z[i])
d[columns[i]]=a
df = pd.DataFrame(d)
time = time_cal(df)
fill_up_missingvalues(df, 'product','other product')
fill_up_missingvalues(df, 'sub_product', 'other sub-product')
fill_up_missingvalues(df, 'company_response_to_consumer' ,'Closed with explanation')
fill_up_missingvalues(df, 'consumer_complaint_narrative' ,'no complain')
fill_up_newcategories(df, 'product',types_products,'other product')
fill_up_newcategories(df, 'sub_product', types_of_subproducts ,'other sub-product')
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
product_onehot_encoded = one_hotencode(df, 'product', label_encoder_product, integer_encoder_product)
subproduct_onehot_encoded = one_hotencode(df, 'sub_product', label_encoder_subproduct, integer_encoder_subproduct)
crc_onehot_encoded = one_hotencode(df, 'company_response_to_consumer', label_encoder_crc, integer_encoder_crc)
preprocessed_text = preprocessing(df,'consumer_complaint_narrative')
vectorized_text = vectorizing_text(preprocessed_text, model)
X=pd.DataFrame()
X=pd.concat([X,pd.DataFrame(product_onehot_encoded)],axis=1,ignore_index=True)
X=pd.concat([X,pd.DataFrame(subproduct_onehot_encoded)],axis=1,ignore_index=True)
X=pd.concat([X,pd.DataFrame(crc_onehot_encoded)],axis=1,ignore_index=True)
X=pd.concat([X,pd.DataFrame(time)],axis=1,ignore_index=True)
X=pd.concat([X,pd.DataFrame(vectorized_text)],axis=1)
model_prediction= keras.models.load_model('/home/seanpwc/mysite/compliance/test_model_prediction')
#y_hat=model_prediction.predict(X)
#result=[]
#for i in range(0,len(y_hat)):
# result.append(str(label_encoder_issue.inverse_transform([argmax(y_hat[i, :])])))
#return jsonify({ 'Prediction' : result})
#final_features = [np.array(int_features)]
#prediction = model.predict(final_features)
#output = round(prediction[0], 2)
output= 'Incorrect information on your report'
return render_template('index.html', Predicting_Issue='The Issue is {}'.format(str(df['company_response_to_consumer'][0])))
#@app.route('/predict_api',methods = ['POST'])
#def predict_api():
# '''
# For direct API calls only
# '''
# data = request.get_json(force=True)
# prediction = model.predict([np.array(list(data.values()))])
# output = prediction[0]
# return jsonify(output)
if __name__ == "__main__":
app.run(debug=True)