import spacy
from spacy.tokenizer import Tokenizer
nlp = spacy.load("en_core_web_sm")
tokenizer = Tokenizer(nlp.vocab)
tokens = []
for text in tokenizer.pipe(df['text']):
text_tokens = [token.text for token in text if token.text != ' ']
tokens.append(text_tokens)
df['tokens'] = tokens
df.head()