rebased embedr (#3511)

* skeleton of embedr service, based on bskyweb

* embedr container setup

* builds on this branch

* actual routes

* fix embedr go:embed

* tweak embedr dockerfile

* progress on embedr

* fix path params

* tweaks to build process

* try to get embedr dockerfile to install embed deps

* build this branch

* updates to match sam's output HTML

* try to unbreak embedr dockerfile

* small embedr tweak

* docker hack

* get embed.js copied over to embedr

* don't x-frame-options for embed.bsky.app

* bskyembed: remove a console.log

* use html/template for golang snippet generation

* simplify embedr API fetches

* missing file

* Rm console.log fully

---------

Co-authored-by: Dan Abramov <dan.abramov@gmail.com>
This commit is contained in:
bnewbold 2024-04-13 12:20:06 -07:00 committed by GitHub
parent 196dd3a8ab
commit 58842d03a9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 912 additions and 42 deletions

1
bskyweb/cmd/embedr/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/bskyweb

View file

@ -0,0 +1,207 @@
package main
import (
"context"
"errors"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
appbsky "github.com/bluesky-social/indigo/api/bsky"
"github.com/bluesky-social/indigo/atproto/syntax"
"github.com/labstack/echo/v4"
)
var ErrPostNotFound = errors.New("post not found")
var ErrPostNotPublic = errors.New("post is not publicly accessible")
func (srv *Server) getBlueskyPost(ctx context.Context, did syntax.DID, rkey syntax.RecordKey) (*appbsky.FeedDefs_PostView, error) {
// fetch the post post (with extra context)
uri := fmt.Sprintf("at://%s/app.bsky.feed.post/%s", did, rkey)
tpv, err := appbsky.FeedGetPostThread(ctx, srv.xrpcc, 1, 0, uri)
if err != nil {
log.Warnf("failed to fetch post: %s\t%v", uri, err)
// TODO: detect 404, specifically?
return nil, ErrPostNotFound
}
if tpv.Thread.FeedDefs_BlockedPost != nil {
return nil, ErrPostNotPublic
} else if tpv.Thread.FeedDefs_ThreadViewPost.Post == nil {
return nil, ErrPostNotFound
}
postView := tpv.Thread.FeedDefs_ThreadViewPost.Post
for _, label := range postView.Author.Labels {
if label.Src == postView.Author.Did && label.Val == "!no-unauthenticated" {
return nil, ErrPostNotPublic
}
}
return postView, nil
}
func (srv *Server) WebHome(c echo.Context) error {
return c.Render(http.StatusOK, "home.html", nil)
}
type OEmbedResponse struct {
Type string `json:"type"`
Version string `json:"version"`
AuthorName string `json:"author_name,omitempty"`
AuthorURL string `json:"author_url,omitempty"`
ProviderName string `json:"provider_url,omitempty"`
CacheAge int `json:"cache_age,omitempty"`
Width int `json:"width,omitempty"`
Height *int `json:"height,omitempty"`
HTML string `json:"html,omitempty"`
}
func (srv *Server) parseBlueskyURL(ctx context.Context, raw string) (*syntax.ATURI, error) {
if raw == "" {
return nil, fmt.Errorf("empty url")
}
// first try simple AT-URI
uri, err := syntax.ParseATURI(raw)
if nil == err {
return &uri, nil
}
// then try bsky.app post URL
u, err := url.Parse(raw)
if err != nil {
return nil, err
}
if u.Hostname() != "bsky.app" {
return nil, fmt.Errorf("only bsky.app URLs currently supported")
}
pathParts := strings.Split(u.Path, "/") // NOTE: pathParts[0] will be empty string
if len(pathParts) != 5 || pathParts[1] != "profile" || pathParts[3] != "post" {
return nil, fmt.Errorf("only bsky.app post URLs currently supported")
}
atid, err := syntax.ParseAtIdentifier(pathParts[2])
if err != nil {
return nil, err
}
rkey, err := syntax.ParseRecordKey(pathParts[4])
if err != nil {
return nil, err
}
var did syntax.DID
if atid.IsHandle() {
ident, err := srv.dir.Lookup(ctx, *atid)
if err != nil {
return nil, err
}
did = ident.DID
} else {
did, err = atid.AsDID()
if err != nil {
return nil, err
}
}
// TODO: don't really need to re-parse here, if we had test coverage
aturi, err := syntax.ParseATURI(fmt.Sprintf("at://%s/app.bsky.feed.post/%s", did, rkey))
if err != nil {
return nil, err
} else {
return &aturi, nil
}
}
func (srv *Server) WebOEmbed(c echo.Context) error {
formatParam := c.QueryParam("format")
if formatParam != "" && formatParam != "json" {
return c.String(http.StatusNotImplemented, "Unsupported oEmbed format: "+formatParam)
}
// TODO: do we actually do something with width?
width := 550
maxWidthParam := c.QueryParam("maxwidth")
if maxWidthParam != "" {
maxWidthInt, err := strconv.Atoi(maxWidthParam)
if err != nil || maxWidthInt < 220 || maxWidthInt > 550 {
return c.String(http.StatusBadRequest, "Invalid maxwidth (expected integer between 220 and 550)")
}
width = maxWidthInt
}
// NOTE: maxheight ignored
aturi, err := srv.parseBlueskyURL(c.Request().Context(), c.QueryParam("url"))
if err != nil {
return c.String(http.StatusBadRequest, fmt.Sprintf("Expected 'url' to be bsky.app URL or AT-URI: %v", err))
}
if aturi.Collection() != syntax.NSID("app.bsky.feed.post") {
return c.String(http.StatusNotImplemented, "Only posts (app.bsky.feed.post records) can be embedded currently")
}
did, err := aturi.Authority().AsDID()
if err != nil {
return err
}
post, err := srv.getBlueskyPost(c.Request().Context(), did, aturi.RecordKey())
if err == ErrPostNotFound {
return c.String(http.StatusNotFound, fmt.Sprintf("%v", err))
} else if err == ErrPostNotPublic {
return c.String(http.StatusForbidden, fmt.Sprintf("%v", err))
} else if err != nil {
return c.String(http.StatusInternalServerError, fmt.Sprintf("%v", err))
}
html, err := srv.postEmbedHTML(post)
if err != nil {
return c.String(http.StatusInternalServerError, fmt.Sprintf("%v", err))
}
data := OEmbedResponse{
Type: "rich",
Version: "1.0",
AuthorName: "@" + post.Author.Handle,
AuthorURL: fmt.Sprintf("https://bsky.app/profile/%s", post.Author.Handle),
ProviderName: "Bluesky Social",
CacheAge: 86400,
Width: width,
Height: nil,
HTML: html,
}
if post.Author.DisplayName != nil {
data.AuthorName = fmt.Sprintf("%s (@%s)", *post.Author.DisplayName, post.Author.Handle)
}
return c.JSON(http.StatusOK, data)
}
func (srv *Server) WebPostEmbed(c echo.Context) error {
// sanity check arguments. don't 4xx, just let app handle if not expected format
rkeyParam := c.Param("rkey")
rkey, err := syntax.ParseRecordKey(rkeyParam)
if err != nil {
return c.String(http.StatusBadRequest, fmt.Sprintf("Invalid RecordKey: %v", err))
}
didParam := c.Param("did")
did, err := syntax.ParseDID(didParam)
if err != nil {
return c.String(http.StatusBadRequest, fmt.Sprintf("Invalid DID: %v", err))
}
_ = rkey
_ = did
// NOTE: this request was't really necessary; the JS will do the same fetch
/*
postView, err := srv.getBlueskyPost(ctx, did, rkey)
if err == ErrPostNotFound {
return c.String(http.StatusNotFound, fmt.Sprintf("%v", err))
} else if err == ErrPostNotPublic {
return c.String(http.StatusForbidden, fmt.Sprintf("%v", err))
} else if err != nil {
return c.String(http.StatusInternalServerError, fmt.Sprintf("%v", err))
}
*/
return c.Render(http.StatusOK, "postEmbed.html", nil)
}

View file

@ -0,0 +1,60 @@
package main
import (
"os"
_ "github.com/joho/godotenv/autoload"
logging "github.com/ipfs/go-log"
"github.com/urfave/cli/v2"
)
var log = logging.Logger("embedr")
func init() {
logging.SetAllLoggers(logging.LevelDebug)
//logging.SetAllLoggers(logging.LevelWarn)
}
func main() {
run(os.Args)
}
func run(args []string) {
app := cli.App{
Name: "embedr",
Usage: "web server for embed.bsky.app post embeds",
}
app.Commands = []*cli.Command{
&cli.Command{
Name: "serve",
Usage: "run the server",
Action: serve,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "appview-host",
Usage: "method, hostname, and port of PDS instance",
Value: "https://public.api.bsky.app",
EnvVars: []string{"ATP_APPVIEW_HOST"},
},
&cli.StringFlag{
Name: "http-address",
Usage: "Specify the local IP/port to bind to",
Required: false,
Value: ":8100",
EnvVars: []string{"HTTP_ADDRESS"},
},
&cli.BoolFlag{
Name: "debug",
Usage: "Enable debug mode",
Value: false,
Required: false,
EnvVars: []string{"DEBUG"},
},
},
},
}
app.RunAndExitOnError()
}

View file

@ -0,0 +1,16 @@
package main
import (
"html/template"
"io"
"github.com/labstack/echo/v4"
)
type Template struct {
templates *template.Template
}
func (t *Template) Render(w io.Writer, name string, data interface{}, c echo.Context) error {
return t.templates.ExecuteTemplate(w, name, data)
}

View file

@ -0,0 +1,236 @@
package main
import (
"context"
"errors"
"fmt"
"html/template"
"io/fs"
"net/http"
"os"
"os/signal"
"strings"
"syscall"
"time"
"github.com/bluesky-social/indigo/atproto/identity"
"github.com/bluesky-social/indigo/util/cliutil"
"github.com/bluesky-social/indigo/xrpc"
"github.com/bluesky-social/social-app/bskyweb"
"github.com/klauspost/compress/gzhttp"
"github.com/klauspost/compress/gzip"
"github.com/labstack/echo/v4"
"github.com/labstack/echo/v4/middleware"
"github.com/urfave/cli/v2"
)
type Server struct {
echo *echo.Echo
httpd *http.Server
xrpcc *xrpc.Client
dir identity.Directory
}
func serve(cctx *cli.Context) error {
debug := cctx.Bool("debug")
httpAddress := cctx.String("http-address")
appviewHost := cctx.String("appview-host")
// Echo
e := echo.New()
// create a new session (no auth)
xrpcc := &xrpc.Client{
Client: cliutil.NewHttpClient(),
Host: appviewHost,
}
// httpd
var (
httpTimeout = 2 * time.Minute
httpMaxHeaderBytes = 2 * (1024 * 1024)
gzipMinSizeBytes = 1024 * 2
gzipCompressionLevel = gzip.BestSpeed
gzipExceptMIMETypes = []string{"image/png"}
)
// Wrap the server handler in a gzip handler to compress larger responses.
gzipHandler, err := gzhttp.NewWrapper(
gzhttp.MinSize(gzipMinSizeBytes),
gzhttp.CompressionLevel(gzipCompressionLevel),
gzhttp.ExceptContentTypes(gzipExceptMIMETypes),
)
if err != nil {
return err
}
//
// server
//
server := &Server{
echo: e,
xrpcc: xrpcc,
dir: identity.DefaultDirectory(),
}
// Create the HTTP server.
server.httpd = &http.Server{
Handler: gzipHandler(server),
Addr: httpAddress,
WriteTimeout: httpTimeout,
ReadTimeout: httpTimeout,
MaxHeaderBytes: httpMaxHeaderBytes,
}
e.HideBanner = true
tmpl := &Template{
templates: template.Must(template.ParseFS(bskyweb.EmbedrTemplateFS, "embedr-templates/*.html")),
}
e.Renderer = tmpl
e.HTTPErrorHandler = server.errorHandler
e.IPExtractor = echo.ExtractIPFromXFFHeader()
// SECURITY: Do not modify without due consideration.
e.Use(middleware.SecureWithConfig(middleware.SecureConfig{
ContentTypeNosniff: "nosniff",
// diable XFrameOptions; we're embedding here!
HSTSMaxAge: 31536000, // 365 days
// TODO:
// ContentSecurityPolicy
// XSSProtection
}))
e.Use(middleware.LoggerWithConfig(middleware.LoggerConfig{
// Don't log requests for static content.
Skipper: func(c echo.Context) bool {
return strings.HasPrefix(c.Request().URL.Path, "/static")
},
}))
e.Use(middleware.RateLimiterWithConfig(middleware.RateLimiterConfig{
Skipper: middleware.DefaultSkipper,
Store: middleware.NewRateLimiterMemoryStoreWithConfig(
middleware.RateLimiterMemoryStoreConfig{
Rate: 10, // requests per second
Burst: 30, // allow bursts
ExpiresIn: 3 * time.Minute, // garbage collect entries older than 3 minutes
},
),
IdentifierExtractor: func(ctx echo.Context) (string, error) {
id := ctx.RealIP()
return id, nil
},
DenyHandler: func(c echo.Context, identifier string, err error) error {
return c.String(http.StatusTooManyRequests, "Your request has been rate limited. Please try again later. Contact support@bsky.app if you believe this was a mistake.\n")
},
}))
// redirect trailing slash to non-trailing slash.
// all of our current endpoints have no trailing slash.
e.Use(middleware.RemoveTrailingSlashWithConfig(middleware.TrailingSlashConfig{
RedirectCode: http.StatusFound,
}))
//
// configure routes
//
// static files
staticHandler := http.FileServer(func() http.FileSystem {
if debug {
log.Debugf("serving static file from the local file system")
return http.FS(os.DirFS("embedr-static"))
}
fsys, err := fs.Sub(bskyweb.EmbedrStaticFS, "embedr-static")
if err != nil {
log.Fatal(err)
}
return http.FS(fsys)
}())
e.GET("/robots.txt", echo.WrapHandler(staticHandler))
e.GET("/ips-v4", echo.WrapHandler(staticHandler))
e.GET("/ips-v6", echo.WrapHandler(staticHandler))
e.GET("/.well-known/*", echo.WrapHandler(staticHandler))
e.GET("/security.txt", func(c echo.Context) error {
return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt")
})
e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler)), func(next echo.HandlerFunc) echo.HandlerFunc {
return func(c echo.Context) error {
path := c.Request().URL.Path
maxAge := 1 * (60 * 60) // default is 1 hour
// Cache javascript and images files for 1 week, which works because
// they're always versioned (e.g. /static/js/main.64c14927.js)
if strings.HasPrefix(path, "/static/js/") || strings.HasPrefix(path, "/static/images/") {
maxAge = 7 * (60 * 60 * 24) // 1 week
}
c.Response().Header().Set("Cache-Control", fmt.Sprintf("public, max-age=%d", maxAge))
return next(c)
}
})
// actual routes
e.GET("/", server.WebHome)
e.GET("/iframe-resize.js", echo.WrapHandler(staticHandler))
e.GET("/embed.js", echo.WrapHandler(staticHandler))
e.GET("/oembed", server.WebOEmbed)
e.GET("/embed/:did/app.bsky.feed.post/:rkey", server.WebPostEmbed)
// Start the server.
log.Infof("starting server address=%s", httpAddress)
go func() {
if err := server.httpd.ListenAndServe(); err != nil {
if !errors.Is(err, http.ErrServerClosed) {
log.Errorf("HTTP server shutting down unexpectedly: %s", err)
}
}
}()
// Wait for a signal to exit.
log.Info("registering OS exit signal handler")
quit := make(chan struct{})
exitSignals := make(chan os.Signal, 1)
signal.Notify(exitSignals, syscall.SIGINT, syscall.SIGTERM)
go func() {
sig := <-exitSignals
log.Infof("received OS exit signal: %s", sig)
// Shut down the HTTP server.
if err := server.Shutdown(); err != nil {
log.Errorf("HTTP server shutdown error: %s", err)
}
// Trigger the return that causes an exit.
close(quit)
}()
<-quit
log.Infof("graceful shutdown complete")
return nil
}
func (srv *Server) ServeHTTP(rw http.ResponseWriter, req *http.Request) {
srv.echo.ServeHTTP(rw, req)
}
func (srv *Server) Shutdown() error {
log.Info("shutting down")
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
return srv.httpd.Shutdown(ctx)
}
func (srv *Server) errorHandler(err error, c echo.Context) {
code := http.StatusInternalServerError
if he, ok := err.(*echo.HTTPError); ok {
code = he.Code
}
c.Logger().Error(err)
data := map[string]interface{}{
"statusCode": code,
}
c.Render(code, "error.html", data)
}

View file

@ -0,0 +1,71 @@
package main
import (
"bytes"
"fmt"
"html/template"
appbsky "github.com/bluesky-social/indigo/api/bsky"
"github.com/bluesky-social/indigo/atproto/syntax"
)
func (srv *Server) postEmbedHTML(postView *appbsky.FeedDefs_PostView) (string, error) {
// ensure that there isn't an injection from the URI
aturi, err := syntax.ParseATURI(postView.Uri)
if err != nil {
log.Error("bad AT-URI in reponse", "aturi", aturi, "err", err)
return "", err
}
post, ok := postView.Record.Val.(*appbsky.FeedPost)
if !ok {
log.Error("bad post record value", "err", err)
return "", err
}
const tpl = `<blockquote class="bluesky-embed" data-bluesky-uri="{{ .PostURI }}" data-bluesky-cid="{{ .PostCID }}"><p{{ if .PostLang }} lang="{{ .PostLang }}"{{ end }}>{{ .PostText }}</p>&mdash; {{ .PostAuthor }} {{ .PostIndexedAt }}</blockquote><script async src="{{ .WidgetURL }}" charset="utf-8"></script>`
t, err := template.New("snippet").Parse(tpl)
if err != nil {
log.Error("template parse error", "err", err)
return "", err
}
var lang string
if len(post.Langs) > 0 {
lang = post.Langs[0]
}
var authorName string
if postView.Author.DisplayName != nil {
authorName = fmt.Sprintf("%s (@%s)", *postView.Author.DisplayName, postView.Author.Handle)
} else {
authorName = fmt.Sprintf("@%s", postView.Author.Handle)
}
fmt.Println(postView.Uri)
fmt.Println(fmt.Sprintf("%s", postView.Uri))
data := struct {
PostURI template.URL
PostCID string
PostLang string
PostText string
PostAuthor string
PostIndexedAt string
WidgetURL template.URL
}{
PostURI: template.URL(postView.Uri),
PostCID: postView.Cid,
PostLang: lang,
PostText: post.Text,
PostAuthor: authorName,
PostIndexedAt: postView.IndexedAt, // TODO: createdAt?
WidgetURL: template.URL("https://embed.bsky.app/static/embed.js"),
}
var buf bytes.Buffer
err = t.Execute(&buf, data)
if err != nil {
log.Error("template parse error", "err", err)
return "", err
}
return buf.String(), nil
}