search = "nanovirus"
chapterPattern = "-\s*Chapter\s+(\d+)\s+"
textPattern = "( -----+\s.*?)----- ASSM"
downloadFromAsstr(search, chapterPattern, textPattern, search)
return 0
sub downloadFromAsstr(search, chapterPattern, textPattern, filename)
setfollowredirects(true)
url = "http://assm1.asstr.org/cgi-bin/field_search.cgi?search=" & urlencode(search) & "&index=subject&submit=Search"
status,page = get(url)
resultPattern = "]*>[^<]*" & chapterPattern & "[^<]*[^<]*]*)>"
numfound,links[].chapterNum,links[].url = matchall(page, resultPattern)
foreach(links,link)
link.chapterNum = integer(link.chapterNum)
end
links = sort(links,links[].chapterNum)
text = ""
foreach(links,link)
info("Processing chapter " & link.chapterNum & "...")
status,page = get(link.url)
writefile(filename & link.chapterNum & ".txt", page)
found,chapterText = match(page, textPattern)
if (!found)
warning("Text not found - chapter " & link.chapterNum)
chapterText = page
end
text = text & chapterText
end
writefile(filename & ".txt", text)
end