import numpy as np
import pandas as pd
df = pd.read_csv('./smsspamcollection.tsv', sep='\t')
df.head()
df['label'].value_counts()
# split data set
from sklearn.model_selection import train_test_split
X = df['message']
y = df['label']
df.[......]