utils.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3. data_colors = [(1, 0, 0), (0, 0, 1)]
  4. def read_points_file(filename):
  5. pts = []
  6. with open(filename, "r") as f:
  7. for pt in f:
  8. pt = pt.strip("\n").split()
  9. pts.append([float(pt[0]), float(pt[1])])
  10. return pts
  11. def read_data(class_0_file, class_1_file):
  12. pts_0 = read_points_file(class_0_file)
  13. pts_1 = read_points_file(class_1_file)
  14. x = pts_0 + pts_1
  15. labels = [0] * len(pts_0) + [1] * len(pts_1)
  16. x = np.array(x)
  17. return (x, labels)
  18. def plot_data(X_train, y_train, X_test, y_test):
  19. X = np.concatenate((X_train, X_test))
  20. y = np.concatenate((y_train, y_test))
  21. colors = get_colors(y)
  22. colors_train = get_colors(y_train)
  23. colors_test = get_colors(y_test)
  24. plt.figure(figsize=(12, 4), dpi=150)
  25. # Plot all data plot
  26. plt.subplot(131)
  27. plt.axis('equal')
  28. plt.scatter(X[:, 0], X[:, 1], c = colors, s = 10, edgecolors=colors)
  29. plt.title("Data (100%)")
  30. # training data plot
  31. plt.subplot(132)
  32. plt.axis('equal')
  33. #plt.axis('off')
  34. plt.scatter(X_train[:, 0], X_train[:, 1], c = colors_train, s = 10, edgecolors=colors_train)
  35. plt.title("Training Data (80%)")
  36. # testing data plot
  37. plt.subplot(133)
  38. plt.axis('equal')
  39. #plt.axis('off')
  40. plt.scatter(X_test[:, 0], X_test[:, 1], c = colors_test, s = 10, edgecolors=colors_test)
  41. plt.title("Test Data (20%)")
  42. plt.tight_layout()
  43. plt.show()
  44. def get_colors(y):
  45. return [data_colors[item] for item in y]
  46. def plot_decision_function(X_train, y_train, X_test, y_test, clf):
  47. plt.figure(figsize=(8, 4), dpi=150)
  48. plt.subplot(121)
  49. plt.title("Training data")
  50. plot_decision_function_helper(X_train, y_train, clf)
  51. plt.subplot(122)
  52. plt.title("Test data")
  53. plot_decision_function_helper(X_test, y_test, clf, True)
  54. plt.show()
  55. def plot_decision_function_helper(X, y, clf, show_only_decision_function = False):
  56. colors = get_colors(y)
  57. plt.axis('equal')
  58. plt.tight_layout()
  59. #plt.axis('off')
  60. plt.scatter(X[:, 0], X[:, 1], c = colors, s = 10, edgecolors=colors)
  61. ax = plt.gca()
  62. xlim = ax.get_xlim()
  63. ylim = ax.get_ylim()
  64. # Create grid to evaluate model
  65. xx = np.linspace(xlim[0], xlim[1], 30)
  66. yy = np.linspace(ylim[0], ylim[1], 30)
  67. YY, XX = np.meshgrid(yy, xx)
  68. xy = np.vstack([XX.ravel(), YY.ravel()]).T
  69. Z = clf.decision_function(xy).reshape(XX.shape)
  70. if show_only_decision_function:
  71. # Plot decision boundary
  72. ax.contour(XX, YY, Z, colors='k', levels=[0], alpha=0.5,
  73. linestyles=['-'])
  74. else :
  75. # Plot decision boundary and margins
  76. ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
  77. linestyles=['--', '-', '--'])
  78. # Plot support vectors
  79. #ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s = 10,
  80. # linewidth=1, facecolors='k', c = 'k', label='Support Vectors')
  81. #plt.legend(fontsize='small')