Loading ml.py +104 −77 Original line number Diff line number Diff line Loading @@ -21,52 +21,35 @@ sgd = None nn = None tree = None def getPerformanceMetrics(): def preprocessing(): global X, Y XYList = preprocess.loadXY() X = XYList[0] Y = XYList[1] def testModels(): if len(X) == 0: preprocessing() if tree == None: loadModels() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) pla_yPred = pla.predict(X_test) sgd_yPred = sgd.predict(X_test) nn_yPred = nn.predict(X_test) tree_yPred = tree.predict(X_test) accuracy = [accuracy_score(Y_test, pla_yPred), accuracy_score(Y_test, sgd_yPred), accuracy_score(Y_test, nn_yPred), accuracy_score(Y_test, tree_yPred)] precision = [precision_score(Y_test, pla_yPred, average='macro'), precision_score(Y_test, sgd_yPred, average='macro'), precision_score(Y_test, nn_yPred, average='macro'), precision_score(Y_test, tree_yPred, average='macro')] recall = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] f1 = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] pred_y = pla.predict(X) print('Perceptron (all data):') printMetrics(Y, pred_y) # accuracy = accuracy_score(y_actual, y_pred) # precision = precision_score(y_actual, y_pred, average='macro') # recall = recall_score(y_actual, y_pred, average='macro') # f1 = f1_score(y_actual, y_pred, average='macro') pred_y = sgd.predict(X) print('\nStochastic Gradient Descent (all data):') printMetrics(Y, pred_y) return accuracy, precision, recall, f1 pred_y = nn.predict(X) print('\nNeural Network (all data):') printMetrics(Y, pred_y) def printMetrics(y_actual, y_pred): print('Accuracy: ', accuracy_score(y_actual, y_pred)) print('Precision:', precision_score(y_actual, y_pred, average='macro')) print('Recall: ', recall_score(y_actual, y_pred, average='macro')) print('F1: ', f1_score(y_actual, y_pred, average='macro')) pred_y = tree.predict(X) print('\nDecision Tree (all data):') printMetrics(Y, pred_y) def binarize(value): Loading @@ -84,11 +67,11 @@ def binarize_list(data): return result def preprocessing(): global X, Y XYList = preprocess.loadXY() X = XYList[0] Y = XYList[1] def printMetrics(y_actual, y_pred): print('Accuracy: ', accuracy_score(y_actual, y_pred)) print('Precision:', precision_score(y_actual, y_pred, average='macro')) print('Recall: ', recall_score(y_actual, y_pred, average='macro')) print('F1: ', f1_score(y_actual, y_pred, average='macro')) def trainModels(): Loading Loading @@ -120,7 +103,8 @@ def trainModels(): print('\nPerceptron (testing data):') printMetrics(Y_test, pred_y_test) dump(pla.best_estimator_, 'pla.joblib') pla = pla.best_estimator_ dump(pla, 'pla.joblib') print('==================================================\n\n') Loading @@ -146,7 +130,8 @@ def trainModels(): print('\nSGD (testing data):') printMetrics(Y_test, pred_y_test) dump(sgd.best_estimator_, 'sgd.joblib') sgd = sgd.best_estimator_ dump(sgd, 'sgd.joblib') print('==================================================\n\n') Loading @@ -172,7 +157,8 @@ def trainModels(): print('\nNeural Network (testing data):') printMetrics(Y_test, pred_y_test) dump(nn.best_estimator_, 'nn.joblib') nn = nn.best_estimator_ dump(nn, 'nn.joblib') print('==================================================\n\n') Loading Loading @@ -208,30 +194,6 @@ def loadModels(): tree = load('tree.joblib') def testModels(): if len(X) == 0: preprocessing() if tree == None: loadModels() pred_y = pla.predict(X) print('Perceptron (all data):') printMetrics(Y, pred_y) pred_y = sgd.predict(X) print('\nStochastic Gradient Descent (all data):') printMetrics(Y, pred_y) pred_y = nn.predict(X) print('\nNeural Network (all data):') printMetrics(Y, pred_y) pred_y = tree.predict(X) print('\nDecision Tree (all data):') printMetrics(Y, pred_y) #### not finished def makePrediction(message): if tree == None: loadModels() Loading @@ -257,19 +219,84 @@ def makePrediction(message): return str(pred_y.count(1)) + ' out of 4 models predict this message to be spam.' # declaring, training, fitting each algorithm def mlinit(): pass def getPerformanceMetrics(): if len(X) == 0: preprocessing() if tree == None: loadModels() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) pla_yPred = pla.predict(X_test) sgd_yPred = sgd.predict(X_test) nn_yPred = nn.predict(X_test) tree_yPred = tree.predict(X_test) accuracy = [accuracy_score(Y_test, pla_yPred), accuracy_score(Y_test, sgd_yPred), accuracy_score(Y_test, nn_yPred), accuracy_score(Y_test, tree_yPred)] precision = [precision_score(Y_test, pla_yPred, average='macro'), precision_score(Y_test, sgd_yPred, average='macro'), precision_score(Y_test, nn_yPred, average='macro'), precision_score(Y_test, tree_yPred, average='macro')] recall = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] f1 = [f1_score(Y_test, pla_yPred, average='macro'), f1_score(Y_test, sgd_yPred, average='macro'), f1_score(Y_test, nn_yPred, average='macro'), f1_score(Y_test, tree_yPred, average='macro')] # accuracy = accuracy_score(y_actual, y_pred) # precision = precision_score(y_actual, y_pred, average='macro') # recall = recall_score(y_actual, y_pred, average='macro') # f1 = f1_score(y_actual, y_pred, average='macro') return accuracy, precision, recall, f1 # partial fit the new prediction data to each algorithm def partialFitNewData(message, label): pass before = getPerformanceMetrics() global pla, sgd, nn, tree x_new = [preprocess.extract(message)] y_new = [0 if label == 'ham' else 1] print('Updating Perceptron Model...') pla.partial_fit(x_new, y_new) #dump(pla, 'pla.joblib') print('Updating Stochastic Gradient Descent Model...') sgd.partial_fit(x_new, y_new) #dump(sgd, 'sgd.joblib') print('Updating Neural Network...') nn.partial_fit(x_new, y_new) #dump(nn, 'nn.joblib') print('Retraining Decision Tree...') X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) tree = DecisionTreeClassifier() tree.fit(np.concatenate((X_train, x_new), axis=0), np.concatenate((Y_train, y_new), axis=0)) #dump(tree, 'tree.joblib') after = getPerformanceMetrics() print(np.subtract(after, before)) return 'Models updated successfully' def switchCurrentAlgorithm(): pass # def getPerformanceMetrics(): # pass def getSampleMessages(): pass # declaring, training, fitting each algorithm def mlinit(): preprocessing() loadModels() No newline at end of file server.py +0 −3 Original line number Diff line number Diff line Loading @@ -59,7 +59,6 @@ def getTopAlgorithms(): name = ["Perceptron", "Stochastic Gradient Descent", "Neural Network", "Decision Tree"] # accuracy, precision, recall, f1 = ml.getPerformanceMetrics() result = ml.getPerformanceMetrics() print(result) accuracy = result[0] precision = result[1] recall = result[2] Loading @@ -84,7 +83,6 @@ def getTopAlgorithms(): "accuracy": accuracy } return val # TODO make this get the actual top feature words Loading @@ -101,5 +99,4 @@ def getTopFiveWords(): # Run the app! if __name__ == '__main__': app.run(port = 8080) ml.preprocessing() ml.mlinit() No newline at end of file Loading
ml.py +104 −77 Original line number Diff line number Diff line Loading @@ -21,52 +21,35 @@ sgd = None nn = None tree = None def getPerformanceMetrics(): def preprocessing(): global X, Y XYList = preprocess.loadXY() X = XYList[0] Y = XYList[1] def testModels(): if len(X) == 0: preprocessing() if tree == None: loadModels() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) pla_yPred = pla.predict(X_test) sgd_yPred = sgd.predict(X_test) nn_yPred = nn.predict(X_test) tree_yPred = tree.predict(X_test) accuracy = [accuracy_score(Y_test, pla_yPred), accuracy_score(Y_test, sgd_yPred), accuracy_score(Y_test, nn_yPred), accuracy_score(Y_test, tree_yPred)] precision = [precision_score(Y_test, pla_yPred, average='macro'), precision_score(Y_test, sgd_yPred, average='macro'), precision_score(Y_test, nn_yPred, average='macro'), precision_score(Y_test, tree_yPred, average='macro')] recall = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] f1 = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] pred_y = pla.predict(X) print('Perceptron (all data):') printMetrics(Y, pred_y) # accuracy = accuracy_score(y_actual, y_pred) # precision = precision_score(y_actual, y_pred, average='macro') # recall = recall_score(y_actual, y_pred, average='macro') # f1 = f1_score(y_actual, y_pred, average='macro') pred_y = sgd.predict(X) print('\nStochastic Gradient Descent (all data):') printMetrics(Y, pred_y) return accuracy, precision, recall, f1 pred_y = nn.predict(X) print('\nNeural Network (all data):') printMetrics(Y, pred_y) def printMetrics(y_actual, y_pred): print('Accuracy: ', accuracy_score(y_actual, y_pred)) print('Precision:', precision_score(y_actual, y_pred, average='macro')) print('Recall: ', recall_score(y_actual, y_pred, average='macro')) print('F1: ', f1_score(y_actual, y_pred, average='macro')) pred_y = tree.predict(X) print('\nDecision Tree (all data):') printMetrics(Y, pred_y) def binarize(value): Loading @@ -84,11 +67,11 @@ def binarize_list(data): return result def preprocessing(): global X, Y XYList = preprocess.loadXY() X = XYList[0] Y = XYList[1] def printMetrics(y_actual, y_pred): print('Accuracy: ', accuracy_score(y_actual, y_pred)) print('Precision:', precision_score(y_actual, y_pred, average='macro')) print('Recall: ', recall_score(y_actual, y_pred, average='macro')) print('F1: ', f1_score(y_actual, y_pred, average='macro')) def trainModels(): Loading Loading @@ -120,7 +103,8 @@ def trainModels(): print('\nPerceptron (testing data):') printMetrics(Y_test, pred_y_test) dump(pla.best_estimator_, 'pla.joblib') pla = pla.best_estimator_ dump(pla, 'pla.joblib') print('==================================================\n\n') Loading @@ -146,7 +130,8 @@ def trainModels(): print('\nSGD (testing data):') printMetrics(Y_test, pred_y_test) dump(sgd.best_estimator_, 'sgd.joblib') sgd = sgd.best_estimator_ dump(sgd, 'sgd.joblib') print('==================================================\n\n') Loading @@ -172,7 +157,8 @@ def trainModels(): print('\nNeural Network (testing data):') printMetrics(Y_test, pred_y_test) dump(nn.best_estimator_, 'nn.joblib') nn = nn.best_estimator_ dump(nn, 'nn.joblib') print('==================================================\n\n') Loading Loading @@ -208,30 +194,6 @@ def loadModels(): tree = load('tree.joblib') def testModels(): if len(X) == 0: preprocessing() if tree == None: loadModels() pred_y = pla.predict(X) print('Perceptron (all data):') printMetrics(Y, pred_y) pred_y = sgd.predict(X) print('\nStochastic Gradient Descent (all data):') printMetrics(Y, pred_y) pred_y = nn.predict(X) print('\nNeural Network (all data):') printMetrics(Y, pred_y) pred_y = tree.predict(X) print('\nDecision Tree (all data):') printMetrics(Y, pred_y) #### not finished def makePrediction(message): if tree == None: loadModels() Loading @@ -257,19 +219,84 @@ def makePrediction(message): return str(pred_y.count(1)) + ' out of 4 models predict this message to be spam.' # declaring, training, fitting each algorithm def mlinit(): pass def getPerformanceMetrics(): if len(X) == 0: preprocessing() if tree == None: loadModels() X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) pla_yPred = pla.predict(X_test) sgd_yPred = sgd.predict(X_test) nn_yPred = nn.predict(X_test) tree_yPred = tree.predict(X_test) accuracy = [accuracy_score(Y_test, pla_yPred), accuracy_score(Y_test, sgd_yPred), accuracy_score(Y_test, nn_yPred), accuracy_score(Y_test, tree_yPred)] precision = [precision_score(Y_test, pla_yPred, average='macro'), precision_score(Y_test, sgd_yPred, average='macro'), precision_score(Y_test, nn_yPred, average='macro'), precision_score(Y_test, tree_yPred, average='macro')] recall = [recall_score(Y_test, pla_yPred, average='macro'), recall_score(Y_test, sgd_yPred, average='macro'), recall_score(Y_test, nn_yPred, average='macro'), recall_score(Y_test, tree_yPred, average='macro')] f1 = [f1_score(Y_test, pla_yPred, average='macro'), f1_score(Y_test, sgd_yPred, average='macro'), f1_score(Y_test, nn_yPred, average='macro'), f1_score(Y_test, tree_yPred, average='macro')] # accuracy = accuracy_score(y_actual, y_pred) # precision = precision_score(y_actual, y_pred, average='macro') # recall = recall_score(y_actual, y_pred, average='macro') # f1 = f1_score(y_actual, y_pred, average='macro') return accuracy, precision, recall, f1 # partial fit the new prediction data to each algorithm def partialFitNewData(message, label): pass before = getPerformanceMetrics() global pla, sgd, nn, tree x_new = [preprocess.extract(message)] y_new = [0 if label == 'ham' else 1] print('Updating Perceptron Model...') pla.partial_fit(x_new, y_new) #dump(pla, 'pla.joblib') print('Updating Stochastic Gradient Descent Model...') sgd.partial_fit(x_new, y_new) #dump(sgd, 'sgd.joblib') print('Updating Neural Network...') nn.partial_fit(x_new, y_new) #dump(nn, 'nn.joblib') print('Retraining Decision Tree...') X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) tree = DecisionTreeClassifier() tree.fit(np.concatenate((X_train, x_new), axis=0), np.concatenate((Y_train, y_new), axis=0)) #dump(tree, 'tree.joblib') after = getPerformanceMetrics() print(np.subtract(after, before)) return 'Models updated successfully' def switchCurrentAlgorithm(): pass # def getPerformanceMetrics(): # pass def getSampleMessages(): pass # declaring, training, fitting each algorithm def mlinit(): preprocessing() loadModels() No newline at end of file
server.py +0 −3 Original line number Diff line number Diff line Loading @@ -59,7 +59,6 @@ def getTopAlgorithms(): name = ["Perceptron", "Stochastic Gradient Descent", "Neural Network", "Decision Tree"] # accuracy, precision, recall, f1 = ml.getPerformanceMetrics() result = ml.getPerformanceMetrics() print(result) accuracy = result[0] precision = result[1] recall = result[2] Loading @@ -84,7 +83,6 @@ def getTopAlgorithms(): "accuracy": accuracy } return val # TODO make this get the actual top feature words Loading @@ -101,5 +99,4 @@ def getTopFiveWords(): # Run the app! if __name__ == '__main__': app.run(port = 8080) ml.preprocessing() ml.mlinit() No newline at end of file