Download or view trigram.frink in plain text format
// Finds most common n-letter patterns in words.
default = "https://futureboy.us/twain/innocents/innocents.html"
url = input["Enter URL [Innocents Abroad HTML]: ", default]
len = eval[input["Enter character length: "]]
file = read[url]
file =~ %s/<[^>]*>//gs // Strip HTML
file =~ %s/[^a-z\s]//gsi // Remove non-letter characters.
file =~ %s/[\r\n]/ /gs // Remove linefeeds
file =~ %s/\s+/ /gs // Turn multiple spaces into single space.
file = lc[file]
print[joinln[getGrams[file, len]]]
getGrams[str, len] :=
{
fileLen = length[str]
grams = new dict
for i = 0 to (fileLen - len)
{
sub = substrLen[str, i, len];
// Comment this out if you want to see the effects of spaces.
// if (sub =~ %r/\s/)
// next
grams.increment[sub, 1]
}
return sort[array[grams], byColumn[1] ]
}
Download or view trigram.frink in plain text format
This is a program written in the programming language Frink.
For more information, view the Frink
Documentation or see More Sample Frink Programs.
Alan Eliasen was born 20145 days, 6 hours, 1 minutes ago.