GoGronkh/gparser/feedparser.go
2015-10-02 15:23:35 +02:00

170 lines
5.7 KiB
Go

package main
import (
"encoding/xml"
"io/ioutil"
"log"
"net/url"
"path"
"strconv"
"strings"
"time"
"github.com/ChannelMeter/iso8601duration"
"github.com/PuerkitoBio/goquery"
"github.com/cheggaaa/pb"
"git.1750studios.com/gronkhDE/gogronkh/config"
"git.1750studios.com/gronkhDE/gogronkh/database"
"git.1750studios.com/gronkhDE/gogronkh/image"
"git.1750studios.com/gronkhDE/gogronkh/youtube"
)
type RSSItem struct {
Link string `xml:"link"`
}
type RSSFeed struct {
Items []RSSItem `xml:"channel>item"`
}
func ParseRSSFeeds() {
var authors []database.Author
database.Db.Find(&authors)
for _, AT := range authors {
var Feed RSSFeed
res, err := GetHTTPResource(config.C.GronkhUrl + "/zeige/" + AT.Slug.String + "/feed")
if err != nil {
log.Printf("ERR RS %s: Request failed (%+v)", AT.Slug, err)
continue
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
log.Fatalf("ERR RS %s: Document failure (%+v)", AT.Slug, err)
continue
}
xerr := xml.Unmarshal(body, &Feed)
if xerr != nil {
log.Printf("ERR RS %s: XML failure (%+v)", AT.Slug, err)
continue
}
bar := pb.StartNew(len(Feed.Items))
for _, item := range Feed.Items {
ur, _ := url.Parse(item.Link)
slug := path.Base(ur.Path)
var count int
if database.Db.Model(database.Episode{}).Where("slug = ?", slug).Count(&count); count > 0 {
var EP database.Episode
database.Db.Where("slug = ?", slug).First(&EP)
wg.Add(1)
go UpdateEpisode(EP, bar)
} else {
wg.Add(1)
go ParseFeedEpisode(item.Link, bar)
}
}
wg.Wait()
bar.FinishPrint("Parsed RSS feed for " + AT.Slug.String)
}
GenerateSitemap()
}
func ParseFeedEpisode(u string, bar *pb.ProgressBar) {
defer wg.Done()
if bar != nil {
defer bar.Increment()
}
ur, _ := url.Parse(u)
slug := path.Base(ur.Path)
var EP database.Episode
EP.Slug.String = slug
var LP database.LetsPlay
database.Db.Where("slug = ?", path.Base(path.Dir(ur.Path))).First(&LP)
EP.LetsPlayID = LP.ID
res, err := GetHTTPResource(u)
if err != nil {
log.Printf("ERR RS %s: Request failed (%+v)", slug, err)
return
}
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Printf("ERR RS %s: Document failure (%+v)", slug, err)
return
}
EP.Name.String = doc.Find("div.article > h2").First().Text()
if EP.Name.String == "" {
log.Printf("ERR RS %s: Episode name empty", slug)
return
}
if match := episodeRegex.FindStringSubmatch(EP.Name.String); len(match) > 0 {
num, _ := strconv.Atoi(match[1])
EP.Episode.Int64 = int64(num)
} else {
log.Printf("SUC RSS %s: Name does not match RegEx", slug)
EP.Episode.Int64 = 0
}
doc.Find(".article > p").Each(func(i int, s *goquery.Selection) {
EP.Descr.String += s.Text() + "\n"
})
EP.Descr.String = strings.Trim(EP.Descr.String, "\n ")
au, _ := doc.Find(".author > a.avatar").Attr("href")
au = strings.TrimSuffix(au, "/")
aur, _ := url.Parse(au)
if path.Base(aur.Path) == "" || path.Base(aur.Path) == "." {
log.Printf("ERR EP %s: No author found", slug)
return
}
if path.Base(aur.Path) == "tobinator612" { // Don't ask… just… don't ask
EP.AuthorID = LP.AuthorID
} else {
var AT database.Author
database.Db.Where("slug = ?", path.Base(aur.Path)).First(&AT)
EP.AuthorID = AT.ID
}
yt, _ := doc.Find(".youtube > iframe").Attr("src")
ytpath, _ := url.Parse(yt)
EP.Youtube.String = path.Base(ytpath.Path)
if EP.Youtube.String == "watch" {
EP.Youtube.String = ytpath.Query().Get("v")
if idx := strings.LastIndex(EP.Youtube.String, "?"); idx != -1 {
EP.Youtube.String = EP.Youtube.String[:idx]
}
}
ytres, err := youtube.GetVideos([]string {"snippet", "statistics", "status", "contentDetails"}, []string {EP.Youtube.String}, config.C.YoutubeKey)
if err != nil || len(ytres.Items) == 0 || ytres.Items[0].Status.UploadStatus != "processed" {
log.Printf("WAR RS %s: Video %s is private (%s)", slug, EP.Youtube.String, err)
return
}
if EP.Descr.String == "" {
EP.Descr.String = ytres.Items[0].Snippet.Description
}
if thumb, ok := ytres.Items[0].Snippet.Thumbnails["maxres"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
} else if thumb, ok := ytres.Items[0].Snippet.Thumbnails["high"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
}
if err != nil {
log.Printf("WAR EP %s: Error resizing thumbnail: %+v", slug, err)
}
EP.Aired, err = time.Parse(time.RFC3339Nano, ytres.Items[0].Snippet.PublishedAt)
if err != nil {
log.Printf("ERR RS %s: Could not parse aired", slug)
return
}
dur, err := duration.FromString(ytres.Items[0].ContentDetails.Duration)
if err == nil {
EP.Duration.Int64 = int64(dur.ToDuration().Seconds())
} else {
log.Printf("SUC RSS %s: Could not parse duration", slug)
EP.Duration.Int64 = 0
}
EP.Rating.Float64, EP.Votes.Int64 = youtube.GetRatingAndVotesWithRes(ytres)
if err := database.Db.Create(&EP).Error; err != nil {
log.Printf("ERR RS %s: Could not be added to databse (%+v)", slug, err)
} else {
log.Printf("SUC RSS %s: Added to database", slug)
}
}