wi/main.go

153 lines
2.6 KiB
Go
Raw Normal View History

2016-07-21 19:40:19 +00:00
/**
* @file main.go
* @author Mikhail Klementyev jollheef<AT>riseup.net
* @license GNU GPLv3
* @date July, 2016
* @brief Tiny non-interactive cli browser
*/
2016-07-19 22:08:19 +00:00
package main
import (
2016-07-22 12:43:23 +00:00
"bytes"
"database/sql"
"strings"
2016-07-19 22:08:19 +00:00
"fmt"
"io/ioutil"
"log"
"net/http"
2016-07-22 12:44:12 +00:00
"github.com/jollheef/wi/storage"
2016-07-22 12:43:23 +00:00
2016-07-19 22:08:19 +00:00
"github.com/jaytaylor/html2text"
2016-07-22 12:43:23 +00:00
"golang.org/x/net/html"
2016-07-19 22:08:19 +00:00
"golang.org/x/net/html/charset"
kingpin "gopkg.in/alecthomas/kingpin.v2"
)
var (
2016-07-22 12:43:23 +00:00
arg_url = kingpin.Flag("url", "Url").String()
2016-07-24 07:29:11 +00:00
arg_link = kingpin.Flag("link", "Link").Int64()
2016-07-19 22:08:19 +00:00
)
2016-07-24 07:38:23 +00:00
func parseLink(db *sql.DB, oldPage, value string, req *http.Request) (htmlPage string, err error) {
url, err := req.URL.Parse(value)
2016-07-19 22:08:19 +00:00
if err != nil {
2016-07-24 07:38:23 +00:00
return
2016-07-19 22:08:19 +00:00
}
2016-07-24 07:38:23 +00:00
linkNo, err := storage.GetLinkID(db, url.String())
2016-07-19 22:08:19 +00:00
if err != nil {
2016-07-24 07:38:23 +00:00
linkNo, err = storage.AddLink(db, url.String())
if err != nil {
return
}
2016-07-19 22:08:19 +00:00
}
2016-07-24 07:38:23 +00:00
for _, s := range []string{value, html.EscapeString(value)} {
htmlPage = strings.Replace(oldPage, "\""+s+"\"",
"\""+fmt.Sprintf("%d", linkNo)+"\"", -1)
2016-07-19 22:08:19 +00:00
}
2016-07-24 07:38:23 +00:00
return
}
2016-07-19 22:08:19 +00:00
2016-07-24 07:38:23 +00:00
func parseLinks(db *sql.DB, body []byte, req *http.Request) (htmlPage string, err error) {
htmlPage = string(body)
2016-07-22 12:43:23 +00:00
z := html.NewTokenizer(bytes.NewReader(body))
for {
tt := z.Next()
if tt == html.ErrorToken {
break
}
for {
key, value, moreAttr := z.TagAttr()
if string(key) == "href" {
2016-07-24 07:38:23 +00:00
htmlPage, err = parseLink(db, htmlPage, string(value), req)
2016-07-22 12:43:23 +00:00
if err != nil {
2016-07-24 07:38:23 +00:00
return
2016-07-22 12:43:23 +00:00
}
}
if !moreAttr {
break
}
}
}
2016-07-24 07:38:23 +00:00
return
}
func cmd_url(db *sql.DB, url string) {
client := &http.Client{}
// TODO Full url encoding
req, err := http.NewRequest("GET", strings.Replace(url, " ", "%20", -1), nil)
if err != nil {
log.Fatalln(err)
}
req.Header.Set("User-Agent", "Wi 0.0")
resp, err := client.Do(req)
if err != nil {
log.Fatalln(err)
}
defer resp.Body.Close()
utf8, err := charset.NewReader(resp.Body, resp.Header.Get("Content-Type"))
if err != nil {
fmt.Println("Encoding error:", err)
return
}
body, err := ioutil.ReadAll(utf8)
if err != nil {
fmt.Println("IO error:", err)
return
}
htmlPage, err := parseLinks(db, body, req)
if err != nil {
panic(err)
}
2016-07-22 12:43:23 +00:00
text, err := html2text.FromString(htmlPage)
2016-07-19 22:08:19 +00:00
if err != nil {
panic(err)
}
2016-07-22 12:43:23 +00:00
text += ""
2016-07-19 22:08:19 +00:00
fmt.Println(text)
}
2016-07-22 12:43:23 +00:00
2016-07-24 07:29:11 +00:00
func cmd_link(db *sql.DB, linkID int64) {
2016-07-22 12:43:23 +00:00
url, err := storage.GetLink(db, linkID)
if err != nil {
panic(err)
}
cmd_url(db, url)
}
func main() {
db, err := storage.OpenDB("/tmp/wi.db")
if err != nil {
panic(err)
}
kingpin.Parse()
if *arg_url != "" {
cmd_url(db, *arg_url)
} else if *arg_link != 0 {
cmd_link(db, *arg_link)
}
}