Download or view Amazon.frink in plain text format
class Amazon
{
// This is an array of Category objects to process
var queue = new array
// This is a set of seen category codes (as integer)
var seen = new set
// This is a dictionary of <ID, Category>
var hierarchy = new dict
processQueue[] :=
{
while length[queue] > 0
{
cat = queue.popFirst[]
readPage["http://www.amazon.com/" + cat.urlpart + "/zgbs/books/" + cat.id + "/", cat.id]
// println["processing " + cat]
// println[hierarchy]
// println[length[queue] + " items in queue."]
}
}
readPage[url, parentID] :=
{
page = read[url, "windows-1252"]
// println[page]
// for [urlpart, index, title] = parts = page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)\/[^"']*?['"]\s*>([^<]+)/g
for [urlpart, index, title] = parts = page =~ %r/['"]http:\/\/www\.amazon\.com\/([^\/]+)\/zgbs\/books\/(\d+)[^'"]*['"]\s*>([^<]+)/g
{
addQueue[urlpart, parseInt[index], parentID, title]
}
}
// Add a new category to the queue
addQueue[urlpart, id, parentID, title] :=
{
if seen.contains[id] or id == parentID
return
seen.put[id]
// println["Adding $title"]
parent = hierarchy@parentID
parentTitle = (parent != undef) ? parent.title + " | " : ""
fullTitle = parentTitle + title
cat = new Category[urlpart, id, parentID, fullTitle]
println["$id\t$parentID\t$fullTitle"]
queue.push[cat]
hierarchy@id = cat
}
}
class Category
{
var urlpart
var id
var parentID
var title
new[url, i, parent, t] :=
{
urlpart = url
id = i
parentID = parent
title = t
}
}
a = new Amazon
a.readPage["http://www.amazon.com/gp/bestsellers/books/ref=sv_b_2", undef]
a.processQueue[]
//println[a.hierarchy]
Download or view Amazon.frink in plain text format
This is a program written in the programming language Frink.
For more information, view the Frink
Documentation or see More Sample Frink Programs.
Alan Eliasen was born 20139 days, 6 hours, 50 minutes ago.