Download or view Amazon.frink in plain text format
class Amazon
{
// This is an array of Category objects to process
var queue = new array
// This is a set of seen category codes (as integer)
var seen = new set
// This is a dictionary of <ID, Category>
var hierarchy = new dict
processQueue[] :=
{
while length[queue] > 0
{
cat = queue.popFirst[]
readPage["http://www.amazon.com/" + cat.urlpart + "/zgbs/books/" + cat.id + "/", cat.id]
// println["processing " + cat]
// println[hierarchy]
// println[length[queue] + " items in queue."]
}
}
readPage[url, parentID] :=
{
page = read[url, "windows-1252"]
// println[page]
// for [urlpart, index, title] = parts = page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)\/[^"']*?['"]\s*>([^<]+)/g
for [urlpart, index, title] = parts = page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)[^'"]*['"]\s*>([^<]+)/g
{
addQueue[urlpart, parseInt[index], parentID, title]
}
}
// Add a new category to the queue
addQueue[urlpart, id, parentID, title] :=
{
if seen.contains[id] or id == parentID
return
seen.put[id]
// println["Adding $title"]
parent = hierarchy@parentID
parentTitle = (parent != undef) ? parent.title + " | " : ""
fullTitle = parentTitle + title
cat = new Category[urlpart, id, parentID, fullTitle]
println["$id\t$parentID\t$fullTitle"]
queue.push[cat]
hierarchy@id = cat
}
}
class Category
{
var urlpart
var id
var parentID
var title
new[url, i, parent, t] :=
{
urlpart = url
id = i
parentID = parent
title = t
}
}
a = new Amazon
a.readPage["http://www.amazon.com/gp/bestsellers/books/ref=sv_b_2", undef]
a.processQueue[]
//println[a.hierarchy]
Download or view Amazon.frink in plain text format
This is a program written in the programming language Frink.
For more information, view the Frink
Documentation or see More Sample Frink Programs.
Alan Eliasen was born 20302 days, 23 hours, 42 minutes ago.