mirror of
https://github.com/tinode/chat.git
synced 2025-03-14 10:05:07 +00:00
Merge pull request #929 from yinebebt/linkpreview
feat: add link-preview handler
This commit is contained in:
2
go.mod
2
go.mod
@ -21,6 +21,7 @@ require (
|
||||
github.com/tinode/snowflake v1.0.0
|
||||
go.mongodb.org/mongo-driver v1.12.1
|
||||
golang.org/x/crypto v0.21.0
|
||||
golang.org/x/net v0.23.0
|
||||
golang.org/x/oauth2 v0.16.0
|
||||
golang.org/x/text v0.14.0
|
||||
google.golang.org/api v0.148.0
|
||||
@ -68,7 +69,6 @@ require (
|
||||
github.com/xdg-go/stringprep v1.0.4 // indirect
|
||||
github.com/youmark/pkcs8 v0.0.0-20201027041543-1326539a0a0a // indirect
|
||||
go.opencensus.io v0.24.0 // indirect
|
||||
golang.org/x/net v0.23.0 // indirect
|
||||
golang.org/x/sync v0.4.0 // indirect
|
||||
golang.org/x/sys v0.18.0 // indirect
|
||||
golang.org/x/time v0.3.0 // indirect
|
||||
|
204
server/linkpreview.go
Normal file
204
server/linkpreview.go
Normal file
@ -0,0 +1,204 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type linkPreview struct {
|
||||
Title string `json:"title,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
ImageURL string `json:"image_url,omitempty"`
|
||||
}
|
||||
|
||||
var client = &http.Client{
|
||||
Timeout: time.Second * 2,
|
||||
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||
if err := validateURL(req.URL); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
// previewLink handles the HTTP request, fetches the URL, and returns the link preview.
|
||||
func previewLink(w http.ResponseWriter, r *http.Request) {
|
||||
if r.Method != http.MethodGet && r.Method != http.MethodHead {
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
// check authorization
|
||||
uid, challenge, err := authHttpRequest(r)
|
||||
if err != nil {
|
||||
http.Error(w, "invalid auth secret", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if challenge != nil || uid.IsZero() {
|
||||
http.Error(w, "user not authenticated", http.StatusUnauthorized)
|
||||
return
|
||||
}
|
||||
|
||||
u := r.URL.Query().Get("url")
|
||||
if u == "" {
|
||||
http.Error(w, "Missing 'url' query parameter", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
pu, err := url.Parse(u)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
if err := validateURL(pu); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
req, err := http.NewRequest(http.MethodGet, u, nil)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices { // StatusCode != 20X
|
||||
http.Error(w, "Non-OK HTTP status", http.StatusBadGateway)
|
||||
return
|
||||
}
|
||||
|
||||
body := http.MaxBytesReader(nil, resp.Body, 2*1024) // 2KB limit
|
||||
if cc := resp.Header.Get("Cache-Control"); cc != "" {
|
||||
w.Header().Set("Cache-Control", cc)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if r.Method == http.MethodHead {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
}
|
||||
if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") {
|
||||
if err := json.NewEncoder(w).Encode(extractMetadata(body)); err != nil {
|
||||
http.Error(w, "Failed to encode response", http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func extractMetadata(body io.Reader) *linkPreview {
|
||||
var preview linkPreview
|
||||
var inTitleTag bool
|
||||
|
||||
tokenizer := html.NewTokenizer(body)
|
||||
lp:
|
||||
for {
|
||||
switch tokenizer.Next() {
|
||||
case html.ErrorToken:
|
||||
break lp
|
||||
|
||||
case html.StartTagToken, html.SelfClosingTagToken:
|
||||
tag, hasAttr := tokenizer.TagName()
|
||||
tagName := atom.Lookup(tag)
|
||||
if tagName == atom.Meta && hasAttr {
|
||||
var name, property, content string
|
||||
for {
|
||||
key, val, moreAttr := tokenizer.TagAttr()
|
||||
switch atom.String(key) {
|
||||
case "name":
|
||||
name = string(val)
|
||||
case "property":
|
||||
property = string(val)
|
||||
case "content":
|
||||
content = string(val)
|
||||
}
|
||||
if !moreAttr {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if content != "" {
|
||||
if strings.HasPrefix(property, "og:") {
|
||||
switch property {
|
||||
case "og:title":
|
||||
preview.Title = content
|
||||
case "og:description":
|
||||
preview.Description = content
|
||||
case "og:image":
|
||||
preview.ImageURL = content
|
||||
}
|
||||
} else if name == "description" && preview.Description == "" {
|
||||
preview.Description = content
|
||||
}
|
||||
}
|
||||
} else if tagName == atom.Title {
|
||||
inTitleTag = true
|
||||
}
|
||||
|
||||
case html.TextToken:
|
||||
if inTitleTag {
|
||||
if preview.Title == "" {
|
||||
preview.Title = tokenizer.Token().Data
|
||||
}
|
||||
inTitleTag = false
|
||||
}
|
||||
|
||||
case html.EndTagToken:
|
||||
inTitleTag = false
|
||||
}
|
||||
if preview.Title != "" && preview.Description != "" && preview.ImageURL != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return sanitizePreview(preview)
|
||||
}
|
||||
|
||||
func validateURL(u *url.URL) error {
|
||||
if u.Scheme != "http" && u.Scheme != "https" {
|
||||
return &url.Error{Op: "validate", Err: errors.New("invalid scheme")}
|
||||
}
|
||||
|
||||
ips, err := net.LookupIP(u.Hostname())
|
||||
if err != nil {
|
||||
return &url.Error{Op: "validate", Err: errors.New("invalid host")}
|
||||
}
|
||||
for _, ip := range ips {
|
||||
if ip.IsLoopback() || ip.IsPrivate() {
|
||||
return &url.Error{Op: "validate", Err: errors.New("non routable IP address")}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sanitizePreview(preview linkPreview) *linkPreview {
|
||||
if utf8.RuneCountInString(preview.Title) > 80 {
|
||||
preview.Title = string([]rune(preview.Title)[:80])
|
||||
}
|
||||
if utf8.RuneCountInString(preview.Description) > 256 {
|
||||
preview.Description = string([]rune(preview.Description)[:256])
|
||||
}
|
||||
if len(preview.ImageURL) > 2000 {
|
||||
preview.ImageURL = preview.ImageURL[:2000]
|
||||
}
|
||||
|
||||
return &linkPreview{
|
||||
Title: strings.TrimSpace(preview.Title),
|
||||
Description: strings.TrimSpace(preview.Description),
|
||||
ImageURL: strings.TrimSpace(preview.ImageURL),
|
||||
}
|
||||
}
|
@ -202,7 +202,8 @@ var globals struct {
|
||||
|
||||
// URL of the main endpoint.
|
||||
// TODO: implement file-serving API for gRPC and remove this feature.
|
||||
servingAt string
|
||||
servingAt string
|
||||
linkPreviewEnabled bool
|
||||
}
|
||||
|
||||
// Credential validator config.
|
||||
@ -292,16 +293,17 @@ type configType struct {
|
||||
DefaultCountryCode string `json:"default_country_code"`
|
||||
|
||||
// Configs for subsystems
|
||||
Cluster json.RawMessage `json:"cluster_config"`
|
||||
Plugin json.RawMessage `json:"plugins"`
|
||||
Store json.RawMessage `json:"store_config"`
|
||||
Push json.RawMessage `json:"push"`
|
||||
TLS json.RawMessage `json:"tls"`
|
||||
Auth map[string]json.RawMessage `json:"auth_config"`
|
||||
Validator map[string]*validatorConfig `json:"acc_validation"`
|
||||
AccountGC *accountGcConfig `json:"acc_gc_config"`
|
||||
Media *mediaConfig `json:"media"`
|
||||
WebRTC json.RawMessage `json:"webrtc"`
|
||||
Cluster json.RawMessage `json:"cluster_config"`
|
||||
Plugin json.RawMessage `json:"plugins"`
|
||||
Store json.RawMessage `json:"store_config"`
|
||||
Push json.RawMessage `json:"push"`
|
||||
TLS json.RawMessage `json:"tls"`
|
||||
Auth map[string]json.RawMessage `json:"auth_config"`
|
||||
Validator map[string]*validatorConfig `json:"acc_validation"`
|
||||
AccountGC *accountGcConfig `json:"acc_gc_config"`
|
||||
Media *mediaConfig `json:"media"`
|
||||
WebRTC json.RawMessage `json:"webrtc"`
|
||||
LinkPreviewEnabled bool `json:"link_preview_enabled"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -734,6 +736,11 @@ func main() {
|
||||
mux.HandleFunc("/", serve404)
|
||||
}
|
||||
|
||||
globals.linkPreviewEnabled = config.LinkPreviewEnabled
|
||||
if config.LinkPreviewEnabled {
|
||||
mux.HandleFunc(config.ApiPath+"v0/preview-link", previewLink)
|
||||
}
|
||||
|
||||
if err = listenAndServe(config.Listen, mux, tlsConfig, signalHandler()); err != nil {
|
||||
logs.Err.Fatal(err)
|
||||
}
|
||||
|
@ -760,6 +760,7 @@ func (s *Session) hello(msg *ClientComMessage) {
|
||||
"maxTagCount": globals.maxTagCount,
|
||||
"maxFileUploadSize": globals.maxFileUploadSize,
|
||||
"reqCred": globals.validatorClientConfig,
|
||||
"linkPreviewEnabled": globals.linkPreviewEnabled,
|
||||
}
|
||||
if len(globals.iceServers) > 0 {
|
||||
params["iceServers"] = globals.iceServers
|
||||
|
@ -678,5 +678,6 @@
|
||||
// Address of the plugin.
|
||||
"service_addr": "tcp://localhost:40051"
|
||||
}
|
||||
]
|
||||
],
|
||||
"link_preview_enabled":false
|
||||
}
|
||||
|
Reference in New Issue
Block a user