Source code for pyconversations.tokenizers.nltk
import nltk
from .base import BaseTokenizer
[docs]class NLTKTokenizer(BaseTokenizer):
"""
An NLTK-based tokenizer
"""
def __init__(self):
super(NLTKTokenizer, self).__init__('NLTK')
[docs] def tokenize(self, s):
"""
Splits a string into tokens.
Parameters
----------
s : str
The string to tokenize
Returns
-------
list(str)
A list of tokens
"""
return nltk.word_tokenize(s)