GoGronkh/gparser/feedparser.go

167 lines
5.6 KiB
Go

package main
import (
"encoding/xml"
"io/ioutil"
"log"
"net/url"
"path"
"strconv"
"strings"
"time"
"github.com/ChannelMeter/iso8601duration"
"github.com/PuerkitoBio/goquery"
"git.1750studios.com/gronkhDE/gogronkh/config"
"git.1750studios.com/gronkhDE/gogronkh/database"
"git.1750studios.com/gronkhDE/gogronkh/image"
"git.1750studios.com/gronkhDE/gogronkh/youtube"
)
type RSSItem struct {
Link string `xml:"link"`
}
type RSSFeed struct {
Items []RSSItem `xml:"channel>item"`
}
func ParseRSSFeeds() {
var authors []database.Author
database.Db.Find(&authors)
for _, AT := range authors {
var Feed RSSFeed
res, err := GetHTTPResource(config.C.GronkhUrl + "/zeige/" + AT.Slug.String + "/feed")
if err != nil {
log.Printf("ERR RS %s: Request failed (%+v)", AT.Slug, err)
continue
}
body, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatalf("ERR RS %s: Document failure (%+v)", AT.Slug, err)
continue
}
xerr := xml.Unmarshal(body, &Feed)
if xerr != nil {
log.Printf("ERR RS %s: XML failure (%+v)", AT.Slug, err)
continue
}
for _, item := range Feed.Items {
ur, _ := url.Parse(item.Link)
slug := path.Base(ur.Path)
var count int
if database.Db.Model(database.Episode{}).Where("slug = ?", slug).Count(&count); count > 0 {
var EP database.Episode
database.Db.Where("slug = ?", slug).First(&EP)
wg.Add(1)
go UpdateEpisode(EP)
} else {
wg.Add(1)
go ParseFeedEpisode(item.Link)
}
}
wg.Wait()
}
log.Printf("*** FINISHED PARSING RSS FEEDS ***")
GenerateSitemap()
}
func ParseFeedEpisode(u string) {
defer wg.Done()
ur, _ := url.Parse(u)
slug := path.Base(ur.Path)
var EP database.Episode
EP.Slug.String = slug
var LP database.LetsPlay
database.Db.Where("slug = ?", path.Base(path.Dir(ur.Path))).First(&LP)
if LP.ID == 0 {
wg.Add(1)
LP.Slug.String = path.Base(path.Dir(ur.Path))
ParseLPPage(LP.Slug.String, &LP)
if LP.ID == 0 {
return
}
}
if LP.MergeID.Valid == false {
EP.LetsPlayID = LP.ID
EP.Season.Int64 = 1
} else {
EP.LetsPlayID = uint(LP.MergeID.Int64)
EP.Season.Int64 = LP.MergeSeason.Int64
}
res, err := GetHTTPResource(u)
if err != nil {
log.Printf("ERR RS %s: Request failed (%+v)", slug, err)
return
}
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Printf("ERR RS %s: Document failure (%+v)", slug, err)
return
}
EP.Name.String = doc.Find("div.article > h2").First().Text()
if EP.Name.String == "" {
log.Printf("ERR RS %s: Episode name empty", slug)
return
}
if match := episodeRegex.FindStringSubmatch(EP.Name.String); len(match) > 0 {
num, _ := strconv.Atoi(match[1])
EP.Episode.Int64 = int64(num)
EP.Name.String = episodeRegex.ReplaceAllString(EP.Name.String, "")
EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "")
} else {
log.Printf("WAR RSS %s: Name does not match RegEx", slug)
EP.Episode.Int64 = 0
EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "")
}
doc.Find(".article > p").Each(func(i int, s *goquery.Selection) {
EP.Descr.String += s.Text() + "\n"
})
EP.Descr.String = strings.Trim(EP.Descr.String, "\n ")
EP.AuthorID = LP.AuthorID
yt, _ := doc.Find(".youtube > iframe").Attr("src")
ytpath, _ := url.Parse(yt)
EP.Youtube.String = path.Base(ytpath.Path)
if EP.Youtube.String == "watch" {
EP.Youtube.String = ytpath.Query().Get("v")
if idx := strings.LastIndex(EP.Youtube.String, "?"); idx != -1 {
EP.Youtube.String = EP.Youtube.String[:idx]
}
}
ytres, err := youtube.GetVideos([]string {"snippet", "statistics", "status", "contentDetails"}, []string {EP.Youtube.String}, config.C.YoutubeKey)
if err != nil || len(ytres.Items) == 0 || ytres.Items[0].Status.UploadStatus != "processed" {
log.Printf("ERR RS %s: Video %s is private (%s)", slug, EP.Youtube.String, err)
return
}
if EP.Descr.String == "" {
EP.Descr.String = ytres.Items[0].Snippet.Description
}
if thumb, ok := ytres.Items[0].Snippet.Thumbnails["maxres"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
} else if thumb, ok := ytres.Items[0].Snippet.Thumbnails["high"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
}
if err != nil {
log.Printf("WAR EP %s: Error resizing thumbnail: %+v", slug, err)
}
EP.Aired, err = time.Parse(time.RFC3339Nano, ytres.Items[0].Snippet.PublishedAt)
if err != nil {
log.Printf("ERR RS %s: Could not parse aired", slug)
return
}
dur, err := duration.FromString(ytres.Items[0].ContentDetails.Duration)
if err == nil {
EP.Duration.Int64 = int64(dur.ToDuration().Seconds())
} else {
log.Printf("WAR RSS %s: Could not parse duration", slug)
EP.Duration.Int64 = 0
}
EP.Rating.Float64, EP.Votes.Int64 = youtube.GetRatingAndVotesWithRes(ytres)
if err := database.Db.Create(&EP).Error; err != nil {
log.Printf("ERR RS %s: Could not be added to databse (%+v)", slug, err)
}
}