commit 0d2d278260d0ebd1f447aa832dfee09351a95229 Author: RobViren Date: Mon Nov 1 18:38:58 2021 -0500 rebasing diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e38b63e --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +/browser +browser +data-* +log.tsv +datahop* +config.bak +data.xlsx +log.xlsx +log-* \ No newline at end of file diff --git a/buildmac.sh b/buildmac.sh new file mode 100755 index 0000000..55f889a --- /dev/null +++ b/buildmac.sh @@ -0,0 +1,3 @@ +#!/usr/bin/zsh + +GOOS=darwin GOARCH=arm64 go build -x \ No newline at end of file diff --git a/config.xlsx b/config.xlsx new file mode 100755 index 0000000..b8b9137 Binary files /dev/null and b/config.xlsx differ diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..c6d5caf --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module rodbiren.ddns.net/git/robviren/datahop + +go 1.17 + +require ( + github.com/go-rod/rod v0.101.8 + github.com/xuri/excelize/v2 v2.4.1 +) + +require ( + github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/richardlehane/mscfb v1.0.3 // indirect + github.com/richardlehane/msoleps v1.0.1 // indirect + github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 // indirect + github.com/ysmood/goob v0.3.0 // indirect + github.com/ysmood/gson v0.6.4 // indirect + github.com/ysmood/leakless v0.7.0 // indirect + golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 // indirect + golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985 // indirect + golang.org/x/text v0.3.6 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..ab560b5 --- /dev/null +++ b/go.sum @@ -0,0 +1,41 @@ +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-rod/rod v0.101.8 h1:oV0O97uwjkCVyAP0hD6K6bBE8FUMIjs0dtF7l6kEBsU= +github.com/go-rod/rod v0.101.8/go.mod h1:N/zlT53CfSpq74nb6rOR0K8UF0SPUPBmzBnArrms+mY= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= +github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.3 h1:rD8TBkYWkObWO0oLDFCbwMeZ4KoalxQy+QgniCj3nKI= +github.com/richardlehane/mscfb v1.0.3/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1 h1:RfrALnSNXzmXLbGct/P2b4xkFz4e8Gmj/0Vj9M9xC1o= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3 h1:EpI0bqf/eX9SdZDwlMmahKM+CDBgNbsXMhsN28XrM8o= +github.com/xuri/efp v0.0.0-20210322160811-ab561f5b45e3/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/excelize/v2 v2.4.1 h1:veeeFLAJwsNEBPBlDepzPIYS1eLyBVcXNZUW79exZ1E= +github.com/xuri/excelize/v2 v2.4.1/go.mod h1:rSu0C3papjzxQA3sdK8cU544TebhrPUoTOaGPIh0Q1A= +github.com/ysmood/goob v0.3.0 h1:XZ51cZJ4W3WCoCiUktixzMIQF86W7G5VFL4QQ/Q2uS0= +github.com/ysmood/goob v0.3.0/go.mod h1:S3lq113Y91y1UBf1wj1pFOxeahvfKkCk6mTWTWbDdWs= +github.com/ysmood/got v0.15.1/go.mod h1:pE1l4LOwOBhQg6A/8IAatkGp7uZjnalzrZolnlhhMgY= +github.com/ysmood/gotrace v0.2.2/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM= +github.com/ysmood/gson v0.6.4 h1:Yb6tosv6bk59HqjZu2/7o4BFherpYEMkDkXmlhgryZ4= +github.com/ysmood/gson v0.6.4/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg= +github.com/ysmood/leakless v0.7.0 h1:XCGdaPExyoreoQd+H5qgxM3ReNbSPFsEXpSKwbXbwQw= +github.com/ysmood/leakless v0.7.0/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ= +golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97 h1:/UOmuWzQfxxo9UtlXMwuQU8CMgg1eZXqTRwkSQJWKOI= +golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/image v0.0.0-20210220032944-ac19c3e999fb/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985 h1:4CSI6oo7cOjJKajidEljs9h+uP0rRZBPPPhcCbj5mw8= +golang.org/x/net v0.0.0-20210726213435-c6fcb2dbf985/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..9d6d313 --- /dev/null +++ b/main.go @@ -0,0 +1,80 @@ +package main + +import ( + "log" + "os" + "path" + "path/filepath" + "strconv" + "time" + + "github.com/xuri/excelize/v2" + "rodbiren.ddns.net/git/robviren/datahop/runner" +) + +func main() { + + exe_path, err := os.Executable() + if err != nil { + log.Fatalln(err) + } + work_dir := filepath.Dir(exe_path) + + datahop := runner.NewRunner() + data := datahop.RunAll() + defer datahop.Close() + + var f *excelize.File + _, err = os.Stat(path.Join(work_dir, "data.xlsx")) + if err == nil { + f, err = excelize.OpenFile(path.Join(work_dir, "data.xlsx")) + if err != nil { + log.Println(err) + } + } else { + f = excelize.NewFile() + f.NewSheet("Facebook") + f.NewSheet("Instagram") + } + + f.DeleteSheet("Sheet1") + + f.SetCellValue("Facebook", "A1", "Brand") + f.SetCellValue("Facebook", "B1", "Followers") + f.SetCellValue("Facebook", "C1", "Engagement") + f.SetCellValue("Facebook", "D1", "Posts") + f.SetCellValue("Facebook", "E1", "Posts") + + f.SetCellValue("Instagram", "A1", "Brand") + f.SetCellValue("Instagram", "B1", "Followers") + f.SetCellValue("Instagram", "C1", "Engagement") + f.SetCellValue("Instagram", "D1", "Posts") + f.SetCellValue("Instagram", "E1", "Posts") + + for _, elm := range data { + rows, err := f.GetRows(elm.Site) + if err != nil { + log.Println(err) + } + f.SetCellValue(elm.Site, "A"+strconv.Itoa(1+len(rows)), elm.Name) + f.SetCellValue(elm.Site, "B"+strconv.Itoa(1+len(rows)), elm.Followers) + f.SetCellValue(elm.Site, "C"+strconv.Itoa(1+len(rows)), elm.Engagement) + f.SetCellValue(elm.Site, "D"+strconv.Itoa(1+len(rows)), elm.Posts) + f.SetCellValue(elm.Site, "E"+strconv.Itoa(1+len(rows)), time.Now().Format("01-02-2006")) + } + + current_day := time.Now() + + if err := f.SaveAs(path.Join(work_dir, "data-"+current_day.Format("01-02-2006")+".xlsx")); err != nil { + log.Println(err) + } + + if err := f.SaveAs(path.Join(work_dir, "data.xlsx")); err != nil { + log.Println(err) + } + + // signal_channel := make(chan os.Signal, 1) + // signal.Notify(signal_channel, os.Interrupt) + // <-signal_channel + +} diff --git a/runner/check.go b/runner/check.go new file mode 100644 index 0000000..0449ef6 --- /dev/null +++ b/runner/check.go @@ -0,0 +1,9 @@ +package runner + +import "log" + +func checkErr(err error) { + if err != nil { + log.Fatal(err) + } +} diff --git a/runner/facebook.go b/runner/facebook.go new file mode 100644 index 0000000..b3832f2 --- /dev/null +++ b/runner/facebook.go @@ -0,0 +1,102 @@ +package runner + +import ( + _ "embed" + "encoding/json" + "fmt" + "time" +) + +//go:embed js/facebook_data.js +var facebook_data string + +const ( + facebook_login_url = "https://m.facebook.com/login/?ref=dbl&fl" +) + +type FacebookRes struct { + Followers int `json:"followers"` + Posts []FacebookPost `json:"posts"` +} + +type FacebookPost struct { + Engagement int `json:"engagement"` + Body string `json:"body"` + Timestamp time.Time `json:"timestamp"` + URL string `json:"url"` +} + +func (r *Runner) CheckFacebook() { + r.page.MustNavigate(facebook_login_url) + r.page.WaitLoad() + time.Sleep(time.Millisecond * 113) + location := r.page.MustEval("window.location.href").Str() + if location == facebook_login_url { + r.page.MustElement(`input[name="email"]`).Input(r.FacebookEmail) + time.Sleep(time.Millisecond * 113) + r.page.MustElement(`input[name="pass"]`).Input(r.FacebookPass) + time.Sleep(time.Millisecond * 72) + r.page.MustElement(`button[name="login"]`).MustClick() + time.Sleep(time.Millisecond * 5000) + } +} + +func (r *Runner) GetFacebookData() []Result { + var results []Result + for i := range r.Targets { + fmt.Println("Processing " + r.Targets[i].Name + "'s Facebook") + results = append(results, r.GetFacebookDataByIndex(i)) + } + return results +} + +//Navigate with random stops +func (r *Runner) FacebookPageNavigate(index int) { + r.page.MustNavigate(r.Targets[index].Facebook) + r.page.MustWaitLoad() + r.Scroll(30, longTimeOut) +} + +func (r *Runner) FacebookPagePullData() FacebookRes { + data := r.page.MustEval(facebook_data).JSON("", "") + var res FacebookRes + err := json.Unmarshal([]byte(data), &res) + checkErr(err) + return res +} + +func (r *Runner) calcFacebookPostData(posts []FacebookPost) (int, int) { + total_engagement := 0 + total_posts := 0 + for _, elm := range posts { + if elm.Timestamp.After(r.weekago) && elm.Timestamp.Before(r.currentTime) { + total_engagement += elm.Engagement + total_posts += 1 + } + } + + return total_engagement, total_posts +} + +func (r *Runner) GetFacebookDataByIndex(index int) Result { + + r.FacebookPageNavigate(index) + + data := r.FacebookPagePullData() + + total_engagement, total_posts := r.calcFacebookPostData(data.Posts) + + r.logger.LogFacebookRes(data, r.Targets[index].Name) + + result := Result{ + r.Targets[index].Name, + "Facebook", + data.Followers, + total_engagement, + total_posts, + } + + r.logger.LogResult(result, "Facebook") + + return result +} diff --git a/runner/instagram.go b/runner/instagram.go new file mode 100644 index 0000000..c733dc0 --- /dev/null +++ b/runner/instagram.go @@ -0,0 +1,149 @@ +package runner + +import ( + _ "embed" + "encoding/json" + "fmt" + "time" +) + +type InstaRes struct { + Followers int `json:"followers"` + Urls []string `json:"urls"` + Engagement int `json:"engagement"` + Posts []InstaPost `json:"posts"` +} + +type InstaPost struct { + Engagement int `json:"engagement"` + Body string `json:"body"` + Timestamp time.Time `json:"timestamp"` + Comments int `json:"comments"` + Likes int `json:"likes"` + Views int `json:"views"` + URL string `json:"url"` +} + +//go:embed js/instagram_followers_urls.js +var instagram_followers_urls string + +//go:embed js/instagram_post_data.js +var instagram_post_data string + +//go:embed js/instagram_liked_by_url.js +var instagram_liked_by_url string + +//go:embed js/instagram_liked_by.js +var instagram_liked_by string + +const insta_login_url = "https://www.instagram.com/accounts/login/" + +func (r *Runner) CheckInstaGram() { + r.page.MustNavigate(insta_login_url) + r.page.WaitLoad() + location := r.page.MustEval("window.location.href").Str() + if location == insta_login_url { + r.page.MustElement(`input[name="username"]`).Input(r.InstaUser) + time.Sleep(time.Millisecond * 113) + r.page.MustElement(`input[name="password"]`).Input(r.InstaPass) + time.Sleep(time.Millisecond * 72) + r.page.MustElementsByJS(`document.querySelectorAll("button")`).Last().MustClick() + time.Sleep(time.Millisecond * 5000) + } +} + +func (r *Runner) InstaPageNavigate(index int) { + r.page.MustNavigate(r.Targets[index].Instagram) + r.page.MustWaitLoad() + r.Scroll(3, 250) +} + +func (r *Runner) InstaGetRes() InstaRes { + r.page.MustEval(instagram_followers_urls) + data := r.page.MustEval("data").JSON("", "") + var res InstaRes + err := json.Unmarshal([]byte(data), &res) + checkErr(err) + return res +} + +func (r *Runner) calcInstaPostData(posts []InstaPost) (int, int) { + total_engagement := 0 + total_posts := 0 + for _, elm := range posts { + if elm.Timestamp.After(r.weekago) { + total_engagement += elm.Engagement + total_posts += 1 + } + } + + return total_engagement, total_posts +} +func (r *Runner) GetInstaDataByIndex(index int) Result { + + r.InstaPageNavigate(index) + + res := r.InstaGetRes() + posts := r.GetIntsaEgagement(res.Urls) + total_engagement, total_posts := r.calcInstaPostData(posts) + + r.logger.LogInstaRes(posts, r.Targets[index].Name) + + result := Result{r.Targets[index].Name, "Instagram", res.Followers, total_engagement, total_posts} + r.logger.LogResult(result, "Insta") + return result +} + +func (r *Runner) GetInstaData() []Result { + var results []Result + for i := range r.Targets { + fmt.Println("Processing " + r.Targets[i].Name + "'s Instagram") + results = append(results, r.GetInstaDataByIndex(i)) + } + return results +} + +func (r *Runner) GetIntsaEgagement(urls []string) []InstaPost { + var posts []InstaPost + + for _, url := range urls { + fmt.Print("https://www.instagram.com" + url + " ---> ") + r.page.MustNavigate("https://www.instagram.com" + url) + r.page.MustWaitLoad() + r.Scroll(10, 250) + + data := r.page.MustEval(instagram_post_data).JSON("", "") + var res InstaPost + err := json.Unmarshal([]byte(data), &res) + checkErr(err) + if res.Timestamp.Before(r.weekago) { + fmt.Println("Reached post outside range ", res.Timestamp) + return posts + } + res.URL = "https://www.instagram.com" + url + if res.Timestamp.Before(r.currentTime) { + //Hard get the likes + if res.Likes == 0 { + r.page.MustElementByJS(instagram_liked_by_url).MustClick() + r.page.MustWaitLoad() + r.Scroll(99999, longTimeOut) + likes := r.page.MustEval(instagram_liked_by).Int() + + res.Likes = likes + } + + if res.Views != 0 { + res.Engagement = res.Views + res.Comments + } else { + res.Engagement = res.Likes + res.Comments + } + + fmt.Println("Likes :", res.Likes, ", Comments: ", res.Comments) + + posts = append(posts, res) + } else { + fmt.Println("Posted Today ", res.Timestamp) + } + } + return posts +} diff --git a/runner/js/facebook_data.js b/runner/js/facebook_data.js new file mode 100644 index 0000000..fe3623a --- /dev/null +++ b/runner/js/facebook_data.js @@ -0,0 +1,106 @@ +() => { + + function getTimeStamp(time_string) { + let d = new Date(); + if (time_string.includes("Yesterday")) { + return (new Date(d.setDate(d.getDate() - 1))) + } else if (time_string.includes("AM") || time_string.includes("PM")) { + var date_strings = time_string.split(" ") + if (date_strings.length === 5) { + var month_string = date_strings[0] + var month = 0 + switch (month_string) { + case "January": + month = 1 + break + case "February": + month = 2 + break + case "March": + month = 3 + break + case "April": + month = 4 + break + case "May": + month = 5 + break + case "June": + month = 6 + break + case "July": + month = 7 + break + case "August": + month = 8 + break + case "September": + month = 9 + break + case "October": + month = 10 + break + case "November": + month = 11 + break + case "December": + month = 12 + break + } + var day = parseInt(date_strings[1]) + var year = d.getFullYear() + if (month === 12 && d.getMonth() !== month) { + year += 1 + } + var hours = parseInt(date_strings[3].split(":")[0]) + if (hours < 12 && date_strings[4] === "PM") { + hours += 12 + } + var minutes = parseInt(date_strings[3].split(":")[1]) + return new Date(year, month - 1, day, hours, minutes) + } + } else { + return (new Date()) + } + } + + function getURL(elm) { + res = elm.getElementsByTagName("a") + for (let i = 0; i < res.length; i++) { + if (res[i].href.includes("story.php?") && res[i].getAttribute("aria-label") == "Open story") { + return res[i].href + } + } + } + + function getEngagement(elm) { + res = elm.getElementsByTagName("footer")[0] + engage_reg = /\d+/g + total = 0 + try { + res.innerText.match(engage_reg).forEach(match => { + total += parseInt(match) + }) + } catch (e) { return 0 } + + return total + } + + //Followers + followers = parseInt(document.getElementsByName("description")[0].getAttribute("content").replace(/,/g, '').match(/\d+/g)[0]) + + arts = document.getElementsByTagName("article") + posts = [] + for (var i = 0; i < arts.length; i++) { + var timestamp = getTimeStamp(arts[i].getElementsByTagName("abbr")[0].innerText) + //Not accurate + var body = arts[i].innerText + var url = getURL(arts[i]) + var engagement = getEngagement(arts[i]) + if(url){ + posts.push({ timestamp: timestamp.toISOString(), body: body, engagement: engagement, url: url }) + } + } + + return { followers: followers, posts: posts } +} \ No newline at end of file diff --git a/runner/js/instagram_followers_urls.js b/runner/js/instagram_followers_urls.js new file mode 100644 index 0000000..d7231c9 --- /dev/null +++ b/runner/js/instagram_followers_urls.js @@ -0,0 +1,24 @@ +() => { + + follower_data = document.getElementsByTagName("span") + followers = 0 + for (i = 0; i < follower_data.length; i++) { + if (!isNaN(parseInt(follower_data[i].title.replace(/,/g, "")))) { + followers = parseInt(follower_data[i].title.replace(/,/g, "")) + } + } + + //Urls + res = document.getElementsByTagName("a") + insta_urls = [] + for (let i = 0; i < res.length; i++) { + if (res[i].getAttribute("href").length == 15 && res[i].getAttribute("href").search("/p/") == 0) { + insta_urls.push(res[i].getAttribute("href")) + } + } + + window["data"] = { + urls: insta_urls, + followers: followers + } +} \ No newline at end of file diff --git a/runner/js/instagram_liked_by.js b/runner/js/instagram_liked_by.js new file mode 100644 index 0000000..87f52b7 --- /dev/null +++ b/runner/js/instagram_liked_by.js @@ -0,0 +1,3 @@ +() => { + return document.getElementsByTagName("main")[0].innerText.split("Follow\n").length +} \ No newline at end of file diff --git a/runner/js/instagram_liked_by_url.js b/runner/js/instagram_liked_by_url.js new file mode 100644 index 0000000..a0d1c6d --- /dev/null +++ b/runner/js/instagram_liked_by_url.js @@ -0,0 +1,10 @@ +() => { + res = document.getElementsByTagName("a") + for (i = 0; i < res.length; i++) { + if (res[i].href) { + if (res[i].href.includes("liked_by")) { + return res[i] + } + } + } +} \ No newline at end of file diff --git a/runner/js/instagram_post_data.js b/runner/js/instagram_post_data.js new file mode 100644 index 0000000..25bd933 --- /dev/null +++ b/runner/js/instagram_post_data.js @@ -0,0 +1,37 @@ +() => { + //TimeStamp + res = document.getElementsByTagName("time") + timestamp = res[0].getAttribute("datetime") + + //Engage + res = document.getElementsByTagName("article") + like_reg = /\d+ likes/ + comment_reg = /\d+ comments/ + view_reg = /\d+ views/ + comma_reg = /,/g + engagement = 0 + comments = 0 + likes = 0 + views = 0 + try { + engagement += parseInt(res[0].innerText.replace(comma_reg, "").match(like_reg)[0]) + likes += parseInt(res[0].innerText.replace(comma_reg, "").match(like_reg)[0]) + } catch (e) { } + try { + engagement += parseInt(res[0].innerText.replace(comma_reg, "").match(comment_reg)[0]) + comments += parseInt(res[0].innerText.replace(comma_reg, "").match(comment_reg)[0]) + } catch (e) { } + try { + engagement += parseInt(res[0].innerText.replace(comma_reg, "").match(view_reg)[0]) + views += parseInt(res[0].innerText.replace(comma_reg, "").match(view_reg)[0]) + } catch (e) { } + + //Text Content + start = res[0].innerText.search(like_reg) + finish = res[0].innerText.search(comment_reg) + body = res[0].innerText.substr(start, (finish - start)).split("\n")[1] + + return { + timestamp: timestamp, body: body, engagement: engagement, likes: likes, comments: comments, views: views + } +} \ No newline at end of file diff --git a/runner/js/scroll.js b/runner/js/scroll.js new file mode 100644 index 0000000..98e8d64 --- /dev/null +++ b/runner/js/scroll.js @@ -0,0 +1,26 @@ +async (limit,timeout) => { + const getScrollHeight = (element) => { + if (!element) return 0 + + const { scrollHeight, offsetHeight, clientHeight } = element + return Math.max(scrollHeight, offsetHeight, clientHeight) + } + + const position = await new Promise((resolve) => { + let count = 0 + let limit_counter = 0 + const intervalId = setInterval(() => { + const { body } = document + const availableScrollHeight = getScrollHeight(body) + + window.scrollBy(0, 250) + count += 250 + limit_counter += 1 + if (count >= availableScrollHeight || limit_counter > limit) { + clearInterval(intervalId) + resolve(count) + } + }, timeout) + }) + return position +} \ No newline at end of file diff --git a/runner/logger.go b/runner/logger.go new file mode 100644 index 0000000..bffdd9f --- /dev/null +++ b/runner/logger.go @@ -0,0 +1,81 @@ +package runner + +import ( + "path" + "strconv" + "time" + + "github.com/xuri/excelize/v2" +) + +type Logger struct { + log *excelize.File + work_dir string + weekago time.Time + current_time time.Time +} + +func NewLogger(work_dir string, weekago time.Time, current_time time.Time) *Logger { + + logfile := excelize.NewFile() + logfile.NewSheet("FacebookPosts") + logfile.NewSheet("InstaPosts") + logfile.NewSheet("Facebook") + logfile.NewSheet("Insta") + logfile.DeleteSheet("Sheet1") + var logger = new(Logger) + logger.log = logfile + logger.work_dir = work_dir + logger.weekago = weekago + logger.current_time = current_time + return logger +} + +const log_facebook_sheet = "FacebookPosts" +const log_insta_sheet = "InstaPosts" + +func (l *Logger) LogFacebookRes(data FacebookRes, target string) { + rows, err := l.log.GetRows(log_facebook_sheet) + checkErr(err) + current_index := len(rows) + for _, elm := range data.Posts { + if elm.Timestamp.After(l.weekago) && elm.Timestamp.Before(l.current_time) { + l.log.SetCellValue(log_facebook_sheet, "A"+strconv.Itoa(current_index+1), target) + l.log.SetCellValue(log_facebook_sheet, "D"+strconv.Itoa(current_index+1), elm.Timestamp.Format("01-02-2006")) + l.log.SetCellValue(log_facebook_sheet, "C"+strconv.Itoa(current_index+1), elm.Engagement) + l.log.SetCellValue(log_facebook_sheet, "B"+strconv.Itoa(current_index+1), elm.URL) + current_index += 1 + } + } +} + +func (l *Logger) LogInstaRes(posts []InstaPost, target string) { + rows, err := l.log.GetRows(log_insta_sheet) + checkErr(err) + current_index := len(rows) + for _, elm := range posts { + if elm.Timestamp.After(l.weekago) && elm.Timestamp.Before(l.current_time) { + l.log.SetCellValue(log_insta_sheet, "A"+strconv.Itoa(current_index+1), target) + l.log.SetCellValue(log_insta_sheet, "B"+strconv.Itoa(current_index+1), elm.URL) + l.log.SetCellValue(log_insta_sheet, "C"+strconv.Itoa(current_index+1), elm.Engagement) + l.log.SetCellValue(log_insta_sheet, "D"+strconv.Itoa(current_index+1), elm.Timestamp.Format("01-02-2006")) + current_index += 1 + } + } +} + +func (l *Logger) LogResult(data Result, sheet string) { + rows, err := l.log.GetRows(sheet) + checkErr(err) + current_index := len(rows) + l.log.SetCellValue(sheet, "A"+strconv.Itoa(current_index+1), data.Name) + l.log.SetCellValue(sheet, "B"+strconv.Itoa(current_index+1), data.Followers) + l.log.SetCellValue(sheet, "C"+strconv.Itoa(current_index+1), data.Engagement) + l.log.SetCellValue(sheet, "D"+strconv.Itoa(current_index+1), data.Posts) + l.log.SetCellValue(sheet, "E"+strconv.Itoa(current_index+1), time.Now().Format("01-02-2006")) +} + +func (l *Logger) Close() { + err := l.log.SaveAs(path.Join(l.work_dir, "log-"+time.Now().Format("01-02-2006")+".xlsx")) + checkErr(err) +} diff --git a/runner/result.go b/runner/result.go new file mode 100644 index 0000000..d5f0ce3 --- /dev/null +++ b/runner/result.go @@ -0,0 +1,9 @@ +package runner + +type Result struct { + Name string + Site string + Followers int + Engagement int + Posts int +} diff --git a/runner/runner.go b/runner/runner.go new file mode 100644 index 0000000..ace271b --- /dev/null +++ b/runner/runner.go @@ -0,0 +1,152 @@ +package runner + +import ( + "os" + "os/signal" + "path" + "path/filepath" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/devices" + "github.com/go-rod/rod/lib/launcher" + "github.com/xuri/excelize/v2" +) + +const ( + longTimeOut int = 2000 +) + +type Runner struct { + page *rod.Page + logger *Logger + Targets []Target + FacebookEmail string + FacebookPass string + InstaUser string + InstaPass string + work_dir string + weekago time.Time + currentTime time.Time +} + +type Target struct { + Name string `json:"name"` + Facebook string `json:"facebook"` + Instagram string `json:"instagram"` +} + +func getExecutablePath() string { + exe_path, err := os.Executable() + checkErr(err) + work_dir := filepath.Dir(exe_path) + return work_dir +} + +func parseConfig(r *Runner) { + f, err := excelize.OpenFile(path.Join(r.work_dir, "config.xlsx")) + checkErr(err) + + //Facebook + cell, err := f.GetCellValue("Sheet1", "E1") + checkErr(err) + r.FacebookEmail = cell + cell, err = f.GetCellValue("Sheet1", "E2") + checkErr(err) + r.FacebookPass = cell + + //Insta + cell, err = f.GetCellValue("Sheet1", "E3") + checkErr(err) + r.InstaUser = cell + cell, err = f.GetCellValue("Sheet1", "E4") + checkErr(err) + r.InstaPass = cell + + rows, err := f.GetRows("Sheet1") + checkErr(err) + var targets []Target + for i, row := range rows { + var target Target + if i != 0 { + for j, colCell := range row { + switch j { + case 0: + target.Name = colCell + case 1: + target.Facebook = colCell + case 2: + target.Instagram = colCell + } + } + } + if len(target.Name) > 0 { + targets = append(targets, target) + } + } + r.Targets = targets +} + +func configRod(r *Runner) { + u := launcher.New(). + Set("user-data-dir", path.Join(r.work_dir, "browser")). + Headless(false). + MustLaunch() + + r.page = rod.New().ControlURL(u).MustConnect().MustPage() + r.page.MustEmulate(devices.IPhone6or7or8Plus) +} + +func setTime(r *Runner) { + //Establish the date + weekago := time.Now().AddDate(0, 0, -7) + hours := weekago.Hour() + weekago = weekago.Add(time.Duration(-hours) * time.Hour) + r.weekago = weekago + + currentTime := time.Now() + hours = currentTime.Hour() + r.currentTime = currentTime.Add(time.Duration(-hours) * time.Hour) +} + +func NewRunner() *Runner { + work_dir := getExecutablePath() + var runner = new(Runner) + runner.work_dir = work_dir + parseConfig(runner) + configRod(runner) + setTime(runner) + runner.logger = NewLogger(work_dir, runner.weekago, runner.currentTime) + + return runner +} + +func (r *Runner) RunAll() []Result { + r.CheckInstaGram() + idata := r.GetInstaData() + r.CheckFacebook() + fdata := r.GetFacebookData() + return append(idata, fdata...) +} + +func (r *Runner) RunInsta() []Result { + r.CheckInstaGram() + idata := r.GetInstaData() + return idata +} + +func (r *Runner) RunFacebook() []Result { + r.CheckFacebook() + fdata := r.GetFacebookData() + return fdata +} + +func (r *Runner) Close() { + r.logger.Close() +} + +func (r *Runner) JustOpen() { + signal_channel := make(chan os.Signal, 1) + signal.Notify(signal_channel, os.Interrupt) + <-signal_channel +} diff --git a/runner/scroll.go b/runner/scroll.go new file mode 100644 index 0000000..86b2af7 --- /dev/null +++ b/runner/scroll.go @@ -0,0 +1,13 @@ +package runner + +import ( + _ "embed" +) + +//go:embed js/scroll.js +var scroll string + +func (r *Runner) Scroll(limit int, timeout int) { + r.page.MustEval(scroll, limit, timeout) + r.page.MustWaitLoad() +}