GoGronkh/gparser/episodeparser.go

195 lines
6.4 KiB
Go

package main
import (
"log"
"net/url"
"strconv"
"strings"
"time"
"path"
"github.com/ChannelMeter/iso8601duration"
"github.com/PuerkitoBio/goquery"
"git.1750studios.com/gronkhDE/gogronkh/config"
"git.1750studios.com/gronkhDE/gogronkh/database"
"git.1750studios.com/gronkhDE/gogronkh/image"
"git.1750studios.com/gronkhDE/gogronkh/youtube"
)
// Parse new episodes
func ParseEpisodes() {
var LPs []database.LetsPlay
database.Db.Find(&LPs)
for _, LP := range LPs {
wg.Add(1)
go ParseLPEpisodes(LP.Slug.String, 1)
}
wg.Wait()
}
func ParseLPEpisodes(gslug string, page int) {
defer wg.Done()
res, err := GetHTTPResource(config.C.GronkhUrl + "/lets-play/" + gslug + "/page/" + strconv.Itoa(page))
if err != nil {
log.Printf("ERR LP %s: Request failed (%+v)", gslug, err)
return
}
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Printf("ERR LP %s: Document failure (%+v)", gslug, err)
return
}
// Parse the Episodes
doc.Find(".entry.entry-letsplay").Each(func(i int, s *goquery.Selection) {
wg.Add(1)
go ParseEpisode(i, s)
})
_, found := doc.Find(".next").Attr("href")
if found {
wg.Add(1)
go ParseLPEpisodes(gslug, page+1)
}
}
func ParseEpisode(i int, s *goquery.Selection) {
defer wg.Done()
gu, _ := s.Find("h2 > a").Attr("href")
ur, _ := url.Parse(gu)
slug := path.Base(ur.Path)
var count int
if database.Db.Model(database.Episode{}).Unscoped().Where("slug = ?", slug).Count(&count); count > 0 {
return
}
var EP database.Episode
EP.Slug.String = slug
u, _ := s.Find("h1 > a").Attr("href")
ur, _ = url.Parse(u)
var LP database.LetsPlay
database.Db.Where("slug = ?", path.Base(ur.Path)).First(&LP)
if LP.MergeID.Valid == false {
EP.LetsPlayID = LP.ID
EP.Season.Int64 = 1
} else {
EP.LetsPlayID = uint(LP.MergeID.Int64)
EP.Season.Int64 = LP.MergeSeason.Int64
}
res, err := GetHTTPResource(gu)
if err != nil {
log.Printf("ERR EP %s: Request failed (%+v)", slug, err)
return
}
doc, err := goquery.NewDocumentFromResponse(res)
if err != nil {
log.Printf("ERR EP %s: Document failure (%+v)", slug, err)
return
}
EP.Name.String = doc.Find("div.article > h2").First().Text()
if EP.Name.String == "" {
log.Printf("ERR EP %s: Episode name empty", slug)
return
}
if match := episodeRegex.FindStringSubmatch(EP.Name.String); len(match) > 0 {
num, err := strconv.Atoi(match[1])
if err != nil {
log.Printf("WAR EP %s: Integer conversion not possible", slug)
EP.Episode.Int64 = 0
}
EP.Episode.Int64 = int64(num)
EP.Name.String = episodeRegex.ReplaceAllString(EP.Name.String, "")
EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "")
} else {
log.Printf("WAR EP %s: Name does not match RegEx", slug)
EP.Episode.Int64 = 0
EP.Name.String = removeRegex.ReplaceAllString(EP.Name.String, "")
}
doc.Find(".article > p").Each(func(i int, s *goquery.Selection) {
EP.Descr.String += s.Text() + "\n"
})
EP.Descr.String = strings.Trim(EP.Descr.String, "\n ")
EP.AuthorID = LP.AuthorID
yt, _ := doc.Find(".youtube > iframe").Attr("src")
ytpath, _ := url.Parse(yt)
EP.Youtube.String = path.Base(ytpath.Path)
if EP.Youtube.String == "watch" {
EP.Youtube.String = ytpath.Query().Get("v")
if idx := strings.LastIndex(EP.Youtube.String, "?"); idx != -1 {
EP.Youtube.String = EP.Youtube.String[:idx]
}
}
ytres, err := youtube.GetVideos([]string {"snippet", "statistics", "status", "contentDetails"}, []string {EP.Youtube.String}, config.C.YoutubeKey)
if err != nil || len(ytres.Items) == 0 || ytres.Items[0].Status.UploadStatus != "processed" {
log.Printf("ERR EP %s: Video %s is private (%+v)", slug, EP.Youtube.String, err)
return
}
if EP.Descr.String == "" {
EP.Descr.String = ytres.Items[0].Snippet.Description
}
if thumb, ok := ytres.Items[0].Snippet.Thumbnails["maxres"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
} else if thumb, ok := ytres.Items[0].Snippet.Thumbnails["high"]; ok {
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
}
if err != nil {
log.Printf("WAR EP %s: Error resizing thumbnail: %+v", slug, err)
}
EP.Aired, err = time.Parse(time.RFC3339Nano, ytres.Items[0].Snippet.PublishedAt)
if err != nil {
log.Printf("ERR EP %s: Could not parse aired", slug)
return
}
dur, err := duration.FromString(ytres.Items[0].ContentDetails.Duration)
if err == nil {
EP.Duration.Int64 = int64(dur.ToDuration().Seconds())
} else {
log.Printf("WAR EP %s: Could not parse duration", slug)
EP.Duration.Int64 = 0
}
EP.Rating.Float64, EP.Votes.Int64 = youtube.GetRatingAndVotesWithRes(ytres)
if LP.Aired.IsZero() || LP.Aired.After(EP.Aired) {
LP.Aired = EP.Aired
database.Db.Save(&LP)
}
if err := database.Db.Create(&EP).Error; err != nil {
log.Printf("ERR EP %s: Could not be added to databse (%+v)", slug, err)
}
}
// Update episodes
func UpdateEpisodes() {
var EPs []database.Episode
var count int
database.Db.Find(&EPs).Count(&count)
for _, EP := range EPs {
wg.Add(1)
go UpdateEpisode(EP)
}
wg.Wait()
}
func UpdateEpisode(EP database.Episode) {
defer wg.Done()
var thumb youtube.Thumb
EP.Rating.Float64, EP.Votes.Int64, _, thumb = youtube.GetRatingAndVotesWithId(EP.Youtube.String, config.C.YoutubeKey)
var LP database.LetsPlay
database.Db.Where("ID = ?", EP.LetsPlayID).First(&LP)
if LP.Aired.IsZero() || LP.Aired.After(EP.Aired) {
LP.Aired = EP.Aired
database.Db.Save(&LP)
}
if EP.ThumbB.Valid == false {
var err error
EP.ThumbS.String, EP.ThumbB.String, err = image.ResizeThumb(thumb.Url)
if err != nil {
log.Printf("WAR EP %s: Error resizing thumbnail: %+v", EP.Slug.String, err)
}
}
if err := database.Db.Save(&EP).Error; err != nil {
log.Printf("ERR EP %s: Could not be updated in databse (%+v)", EP.Slug.String, err)
}
}