Medium String String

Generate N Grams

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def generate_ngrams(s, n):
    # Convert to lowercases
    s = s.lower()

    # Replace all none alphanumeric characters with spaces
    s = re.sub(r"[^a-zA-Z0-9\s]", " ", s)

    # Break sentence in the token, remove empty tokens
    tokens = [token for token in s.split(" ") if token != ""]

    n_grams = []

    for i in range(len(tokens)):
        n_gram = " ".join(tokens[i : i + n])
        n_grams.append(n_gram)

    return n_grams

🎰