Commit 3f964bf5 authored by mjennewine's avatar mjennewine
Browse files

show predictions for each model

parent 9aaa69d2
Loading
Loading
Loading
Loading
+6 −5
Original line number Diff line number Diff line
@@ -200,10 +200,10 @@ def makePrediction(message):
    
    #predicted label
    pred_y = []
    pred_y.append(pla.predict([preprocess.extract(message)])[0])
    pred_y.append(sgd.predict([preprocess.extract(message)])[0])
    pred_y.append(nn.predict([preprocess.extract(message)])[0])
    pred_y.append(tree.predict([preprocess.extract(message)])[0])
    pred_y.append('spam' if pla.predict([preprocess.extract(message)])[0] == 1 else 'ham')
    pred_y.append('spam' if sgd.predict([preprocess.extract(message)])[0] == 1 else 'ham')
    pred_y.append('spam' if nn.predict([preprocess.extract(message)])[0] == 1 else 'ham')
    pred_y.append('spam' if tree.predict([preprocess.extract(message)])[0] == 1 else 'ham')
    print(pred_y, '\n')
    
    """
@@ -216,7 +216,8 @@ def makePrediction(message):
    print(pred_y)
    """
    
    return str(pred_y.count(1)) + ' out of 4 models predict this message to be spam.'
    #return str(pred_y.count(1)) + ' out of 4 models predict this message to be spam.'
    return str(pred_y.count('spam')) + ' out of 4 models predict this message to be spam.<br><br>Perceptron: ' + pred_y[0] + '<br>Stochastic Gradient Descent: ' + pred_y[1] + '<br>Neural Network: ' + pred_y[2] + '<br>Decision Tree: ' + pred_y[3] + '<br><br>'


def getPerformanceMetrics():
+6 −6
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from joblib import dump, load

features = []


#read in training and testing data
#kaggle and UCI contain the same data
def read_data():
@@ -30,6 +31,7 @@ def read_data():
    return data



def clean_text(text):
    #remove html tags
    clean = re.sub('<[^<]+?>', '', text)
@@ -45,8 +47,6 @@ def clean_text(text):


### create features from training data
### input - spam/ham training set
### returns a list of feature words
def define_features(data, num_features=1000):
    #create list of text and labels
    text =  data['text'].tolist()
@@ -86,8 +86,6 @@ def define_features(data, num_features=1000):


### creates a feature vector from a message
### input - list of feature words and an sms message
### returns a feature vector of 1 if feature is found in text and 0 if feature is not
def extract(message):
    global features
    
@@ -120,8 +118,6 @@ def extract(message):


### creates a feature matrix from a data set
### input - list of feature words and data set to extract features from
### returns a numpy matrix containing a feature vector for each message, also returns an array of correct labels
def prepare(data):
    #create list of text and labels
    text =  data['text'].tolist()
@@ -142,14 +138,18 @@ def prepare(data):
    return np.array(matrix), target



# for API
def loadXY(refresh_data=False):
    if not os.path.isfile('X0.joblib') or not os.path.isfile('X1.joblib') or not os.path.isfile('Y.joblib') or refresh_data:
        print('Reading data...')
        data = read_data()
        
        #extract features from training data
        print('Defining features...')
        define_features(data)
        
        print('Preprocessing data...')
        #create feature matrix for training and testing data
        X, Y = prepare(data)
        dump(X[:25000, :], 'X0.joblib')
+0 −3
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@ from flask_cors import CORS
app = Flask(__name__)
CORS(app)

#import json

# Endpoint to load index.html
@app.route('/')
@@ -29,10 +28,8 @@ def get_script():
@app.route('/predict', methods=["POST"])
def predict():
    json = request.get_json()
    #message = json.loads(request.get_json())
    print(json)
    message = json['message']
    #response = json.dumps({'prediction': 'spam'})
    response = {'prediction': ml.makePrediction(message)}
    return response

+1 −0
Original line number Diff line number Diff line
spam,Larry,
spam,free sex for drugs,