repos / pgit

static site generator for git
git clone https://github.com/picosh/pgit.git

commit
ce9094e
parent
9365859
author
Eric Bower
date
2023-08-15 10:13:04 -0400 EDT
refactor: perf improvements
1 files changed,  +234, -187
M main.go
+234, -187
  1@@ -13,6 +13,7 @@ import (
  2 	"path/filepath"
  3 	"sort"
  4 	"strings"
  5+	"sync"
  6 	"unicode/utf8"
  7 
  8 	"github.com/alecthomas/chroma"
  9@@ -61,6 +62,8 @@ type Config struct {
 10 	// computed
 11 	// cache for skipping commits, trees, etc.
 12 	Cache map[string]bool
 13+	// mutex for Cache
 14+	Mutex sync.RWMutex
 15 	// pretty name for the repo
 16 	RepoName string
 17 	// logger
 18@@ -107,6 +110,7 @@ type CommitData struct {
 19 	WhenStr    string
 20 	AuthorStr  string
 21 	ShortID    string
 22+	ParentID   string
 23 	Refs       []*RefInfo
 24 	*git.Commit
 25 }
 26@@ -301,31 +305,6 @@ func readmeFile(repo *Config) string {
 27 	return strings.ToLower(repo.Readme)
 28 }
 29 
 30-func walkTree(tree *git.Tree, revData *RevData, curpath string, aggregate []*TreeItem) []*TreeItem {
 31-	entries, err := tree.Entries()
 32-	bail(err)
 33-
 34-	for _, entry := range entries {
 35-		fname := filepath.Join(curpath, entry.Name())
 36-		typ := entry.Type()
 37-		if typ == git.ObjectTree {
 38-			re, _ := tree.Subtree(entry.Name())
 39-			aggregate = walkTree(re, revData, fname, aggregate)
 40-		}
 41-
 42-		if entry.Type() == git.ObjectBlob {
 43-			aggregate = append(aggregate, &TreeItem{
 44-				Size:  toPretty(entry.Size()),
 45-				Path:  fname,
 46-				Entry: entry,
 47-				URL:   template.URL(getFileURL(revData, fname)),
 48-			})
 49-		}
 50-	}
 51-
 52-	return aggregate
 53-}
 54-
 55 func (c *Config) writeHtml(writeData *WriteData) {
 56 	ts, err := template.ParseFS(
 57 		efs,
 58@@ -357,6 +336,7 @@ func (c *Config) copyStatic(dst string, data []byte) {
 59 }
 60 
 61 func (c *Config) writeRootSummary(data *PageData, readme template.HTML) {
 62+	c.Logger.Infof("writing root html (%s)", c.RepoPath)
 63 	c.writeHtml(&WriteData{
 64 		Filename: "index.html",
 65 		Template: "html/summary.page.tmpl",
 66@@ -368,6 +348,7 @@ func (c *Config) writeRootSummary(data *PageData, readme template.HTML) {
 67 }
 68 
 69 func (c *Config) writeTree(data *PageData, tree []*TreeItem) {
 70+	c.Logger.Infof("writing tree (%s)", data.RevData.Name())
 71 	c.writeHtml(&WriteData{
 72 		Filename: "index.html",
 73 		Subdir:   getTreeBaseDir(data.RevData),
 74@@ -380,6 +361,7 @@ func (c *Config) writeTree(data *PageData, tree []*TreeItem) {
 75 }
 76 
 77 func (c *Config) writeLog(data *PageData, logs []*CommitData) {
 78+	c.Logger.Infof("writing log file (%s)", data.RevData.Name())
 79 	c.writeHtml(&WriteData{
 80 		Filename: "index.html",
 81 		Subdir:   getLogBaseDir(data.RevData),
 82@@ -392,6 +374,7 @@ func (c *Config) writeLog(data *PageData, logs []*CommitData) {
 83 }
 84 
 85 func (c *Config) writeRefs(data *PageData, refs []*RefInfo) {
 86+	c.Logger.Infof("writing refs (%s)", c.RepoPath)
 87 	c.writeHtml(&WriteData{
 88 		Filename: "refs.html",
 89 		Template: "html/refs.page.tmpl",
 90@@ -402,125 +385,114 @@ func (c *Config) writeRefs(data *PageData, refs []*RefInfo) {
 91 	})
 92 }
 93 
 94-func (c *Config) writeHTMLTreeFiles(pageData *PageData, tree []*TreeItem) string {
 95+func (c *Config) writeHTMLTreeFile(pageData *PageData, treeItem *TreeItem) string {
 96 	readme := ""
 97-	for _, file := range tree {
 98-		b, err := file.Entry.Blob().Bytes()
 99-		bail(err)
100-		str := string(b)
101-
102-		file.IsTextFile = isTextFile(str)
103+	b, err := treeItem.Entry.Blob().Bytes()
104+	bail(err)
105+	str := string(b)
106 
107-		contents := "binary file, cannot display"
108-		if file.IsTextFile {
109-			file.NumLines = len(strings.Split(str, "\n"))
110-			contents, err = parseText(file.Entry.Name(), string(b), c.Theme)
111-			bail(err)
112-		}
113+	treeItem.IsTextFile = isTextFile(str)
114 
115-		d := filepath.Dir(file.Path)
116+	contents := "binary file, cannot display"
117+	if treeItem.IsTextFile {
118+		treeItem.NumLines = len(strings.Split(str, "\n"))
119+		contents, err = parseText(treeItem.Entry.Name(), string(b), c.Theme)
120+		bail(err)
121+	}
122 
123-		nameLower := strings.ToLower(file.Entry.Name())
124-		summary := readmeFile(pageData.Repo)
125-		if nameLower == summary {
126-			readme = contents
127-		}
128+	d := filepath.Dir(treeItem.Path)
129 
130-		c.writeHtml(&WriteData{
131-			Filename: fmt.Sprintf("%s.html", file.Entry.Name()),
132-			Template: "html/file.page.tmpl",
133-			Data: &FilePageData{
134-				PageData: pageData,
135-				Contents: template.HTML(contents),
136-				Path:     file.Path,
137-			},
138-			Subdir: getFileURL(pageData.RevData, d),
139-		})
140+	nameLower := strings.ToLower(treeItem.Entry.Name())
141+	summary := readmeFile(pageData.Repo)
142+	if nameLower == summary {
143+		readme = contents
144 	}
145+
146+	c.writeHtml(&WriteData{
147+		Filename: fmt.Sprintf("%s.html", treeItem.Entry.Name()),
148+		Template: "html/file.page.tmpl",
149+		Data: &FilePageData{
150+			PageData: pageData,
151+			Contents: template.HTML(contents),
152+			Path:     treeItem.Path,
153+		},
154+		Subdir: getFileURL(pageData.RevData, d),
155+	})
156 	return readme
157 }
158 
159-func (c *Config) writeLogDiffs(repo *git.Repository, pageData *PageData, logs []*CommitData) {
160-	for _, commit := range logs {
161-		commitID := commit.ID.String()
162+func (c *Config) writeLogDiff(repo *git.Repository, pageData *PageData, commit *CommitData) {
163+	commitID := commit.ID.String()
164 
165-		if c.Cache[commitID] {
166-			c.Logger.Infof("(%s) commit file already generated, skipping", getShortID(commitID))
167-			continue
168-		} else {
169-			c.Cache[commitID] = true
170-		}
171+	c.Mutex.RLock()
172+	hasCommit := c.Cache[commitID]
173+	c.Mutex.RUnlock()
174 
175-		ancestors, err := commit.Ancestors()
176-		bail(err)
177+	if hasCommit {
178+		c.Logger.Infof("(%s) commit file already generated, skipping", getShortID(commitID))
179+		return
180+	} else {
181+		c.Mutex.Lock()
182+		c.Cache[commitID] = true
183+		c.Mutex.Unlock()
184+	}
185 
186-		// if no ancestors exist then we are at initial commit
187-		parent := commit
188-		if len(ancestors) > 0 {
189-			pt := ancestors[0]
190-			parent = &CommitData{
191-				Commit: pt,
192-				URL:    getCommitURL(pt.ID.String()),
193-			}
194-		}
195-		parentID := parent.ID.String()
196-
197-		diff, err := repo.Diff(
198-			commitID,
199-			0,
200-			0,
201-			0,
202-			git.DiffOptions{Base: parentID},
203-		)
204-
205-		rnd := &DiffRender{
206-			NumFiles:       diff.NumFiles(),
207-			TotalAdditions: diff.TotalAdditions(),
208-			TotalDeletions: diff.TotalDeletions(),
209-		}
210-		fls := []*DiffRenderFile{}
211-		for _, file := range diff.Files {
212-			fl := &DiffRenderFile{
213-				FileType:     diffFileType(file.Type),
214-				OldMode:      file.OldMode(),
215-				OldName:      file.OldName(),
216-				Mode:         file.Mode(),
217-				Name:         file.Name,
218-				NumAdditions: file.NumAdditions(),
219-				NumDeletions: file.NumDeletions(),
220-			}
221-			content := ""
222-			for _, section := range file.Sections {
223-				for _, line := range section.Lines {
224-					content += fmt.Sprintf("%s\n", line.Content)
225-				}
226-			}
227-			// set filename to something our `ParseText` recognizes (e.g. `.diff`)
228-			finContent, err := parseText("commit.diff", content, c.Theme)
229-			bail(err)
230+	diff, err := repo.Diff(
231+		commitID,
232+		0,
233+		0,
234+		0,
235+		git.DiffOptions{},
236+	)
237+	bail(err)
238 
239-			fl.Content = template.HTML(finContent)
240-			fls = append(fls, fl)
241+	rnd := &DiffRender{
242+		NumFiles:       diff.NumFiles(),
243+		TotalAdditions: diff.TotalAdditions(),
244+		TotalDeletions: diff.TotalDeletions(),
245+	}
246+	fls := []*DiffRenderFile{}
247+	for _, file := range diff.Files {
248+		fl := &DiffRenderFile{
249+			FileType:     diffFileType(file.Type),
250+			OldMode:      file.OldMode(),
251+			OldName:      file.OldName(),
252+			Mode:         file.Mode(),
253+			Name:         file.Name,
254+			NumAdditions: file.NumAdditions(),
255+			NumDeletions: file.NumDeletions(),
256 		}
257-		rnd.Files = fls
258-
259-		commitData := &CommitPageData{
260-			PageData:  pageData,
261-			Commit:    commit,
262-			CommitID:  getShortID(commitID),
263-			Diff:      rnd,
264-			Parent:    getShortID(parentID),
265-			CommitURL: getCommitURL(commitID),
266-			ParentURL: getCommitURL(parentID),
267+		content := ""
268+		for _, section := range file.Sections {
269+			for _, line := range section.Lines {
270+				content += fmt.Sprintf("%s\n", line.Content)
271+			}
272 		}
273+		// set filename to something our `ParseText` recognizes (e.g. `.diff`)
274+		finContent, err := parseText("commit.diff", content, c.Theme)
275+		bail(err)
276 
277-		c.writeHtml(&WriteData{
278-			Filename: fmt.Sprintf("%s.html", commitID),
279-			Template: "html/commit.page.tmpl",
280-			Subdir:   "commits",
281-			Data:     commitData,
282-		})
283+		fl.Content = template.HTML(finContent)
284+		fls = append(fls, fl)
285+	}
286+	rnd.Files = fls
287+
288+	commitData := &CommitPageData{
289+		PageData:  pageData,
290+		Commit:    commit,
291+		CommitID:  getShortID(commitID),
292+		Diff:      rnd,
293+		Parent:    getShortID(commit.ParentID),
294+		CommitURL: getCommitURL(commitID),
295+		ParentURL: getCommitURL(commit.ParentID),
296 	}
297+
298+	c.writeHtml(&WriteData{
299+		Filename: fmt.Sprintf("%s.html", commitID),
300+		Template: "html/commit.page.tmpl",
301+		Subdir:   "commits",
302+		Data:     commitData,
303+	})
304 }
305 
306 func getSummaryURL() template.URL {
307@@ -585,6 +557,7 @@ func getShortID(id string) string {
308 }
309 
310 func (c *Config) writeRepo() *BranchOutput {
311+	c.Logger.Infof("Writing repo (%s)", c.RepoPath)
312 	repo, err := git.Open(c.RepoPath)
313 	bail(err)
314 
315@@ -664,6 +637,7 @@ func (c *Config) writeRepo() *BranchOutput {
316 	})
317 
318 	for _, revData := range revs {
319+		c.Logger.Infof("Writing revision (%s)", revData.Name())
320 		data := &PageData{
321 			Repo:     c,
322 			RevData:  revData,
323@@ -698,6 +672,37 @@ func (c *Config) writeRepo() *BranchOutput {
324 	return mainOutput
325 }
326 
327+type TreeWalker struct {
328+	revData  *RevData
329+	treeItem chan *TreeItem
330+}
331+
332+func (tw *TreeWalker) walk(tree *git.Tree, curpath string) {
333+	entries, err := tree.Entries()
334+	bail(err)
335+
336+	for _, entry := range entries {
337+		fname := filepath.Join(curpath, entry.Name())
338+		typ := entry.Type()
339+
340+		if typ == git.ObjectTree {
341+			re, _ := tree.Subtree(entry.Name())
342+			tw.walk(re, fname)
343+		} else if typ == git.ObjectBlob {
344+			tw.treeItem <- &TreeItem{
345+				Size:  toPretty(entry.Size()),
346+				Path:  fname,
347+				Entry: entry,
348+				URL:   template.URL(getFileURL(tw.revData, fname)),
349+			}
350+		}
351+	}
352+
353+	if curpath == "" {
354+		close(tw.treeItem)
355+	}
356+}
357+
358 func (c *Config) writeRevision(repo *git.Repository, pageData *PageData, refs []*RefInfo) *BranchOutput {
359 	c.Logger.Infof(
360 		"compiling (%s) revision (%s)",
361@@ -706,89 +711,131 @@ func (c *Config) writeRevision(repo *git.Repository, pageData *PageData, refs []
362 	)
363 
364 	output := &BranchOutput{}
365-	pageSize := pageData.Repo.MaxCommits
366-	if pageSize == 0 {
367-		pageSize = 5000
368-	}
369 
370-	commits, err := repo.CommitsByPage(pageData.RevData.ID(), 0, pageSize)
371-	bail(err)
372+	var wg sync.WaitGroup
373 
374-	logs := []*CommitData{}
375-	for i, commit := range commits {
376-		if i == 0 {
377-			output.LastCommit = commit
378+	wg.Add(1)
379+	go func() {
380+		defer wg.Done()
381+
382+		pageSize := pageData.Repo.MaxCommits
383+		if pageSize == 0 {
384+			pageSize = 5000
385 		}
386+		fmt.Println("grabbing commits")
387+		commits, err := repo.CommitsByPage(pageData.RevData.ID(), 0, pageSize)
388+		fmt.Println("got commits")
389+		bail(err)
390 
391-		tags := []*RefInfo{}
392-		for _, ref := range refs {
393-			if commit.ID.String() == ref.ID {
394-				tags = append(tags, ref)
395+		logs := []*CommitData{}
396+		for i, commit := range commits {
397+			if i == 0 {
398+				output.LastCommit = commit
399+			}
400+
401+			tags := []*RefInfo{}
402+			for _, ref := range refs {
403+				if commit.ID.String() == ref.ID {
404+					tags = append(tags, ref)
405+				}
406 			}
407+
408+			parentSha, _ := commit.ParentID(0)
409+			parentID := ""
410+			if parentSha == nil {
411+				parentID = commit.ID.String()
412+			} else {
413+				parentID = parentSha.String()
414+			}
415+			logs = append(logs, &CommitData{
416+				ParentID:   parentID,
417+				URL:        getCommitURL(commit.ID.String()),
418+				ShortID:    getShortID(commit.ID.String()),
419+				SummaryStr: commit.Summary(),
420+				AuthorStr:  commit.Author.Name,
421+				WhenStr:    commit.Author.When.Format("02 Jan 06"),
422+				Commit:     commit,
423+				Refs:       tags,
424+			})
425 		}
426 
427-		logs = append(logs, &CommitData{
428-			URL:        getCommitURL(commit.ID.String()),
429-			ShortID:    getShortID(commit.ID.String()),
430-			SummaryStr: commit.Summary(),
431-			AuthorStr:  commit.Author.Name,
432-			WhenStr:    commit.Author.When.Format("02 Jan 06"),
433-			Commit:     commit,
434-			Refs:       tags,
435-		})
436-	}
437+		c.writeLog(pageData, logs)
438+
439+		for _, cm := range logs {
440+			wg.Add(1)
441+			go func(commit *CommitData) {
442+				defer wg.Done()
443+				c.writeLogDiff(repo, pageData, commit)
444+			}(cm)
445+		}
446+	}()
447 
448 	tree, err := repo.LsTree(pageData.RevData.ID())
449 	bail(err)
450 
451-	entries := []*TreeItem{}
452-	treeEntries := walkTree(tree, pageData.RevData, "", entries)
453-	for _, entry := range treeEntries {
454-		entry.Path = strings.TrimPrefix(entry.Path, "/")
455+	treeEntries := []*TreeItem{}
456+	readme := ""
457+	entries := make(chan *TreeItem)
458+	tw := &TreeWalker{
459+		revData:  pageData.RevData,
460+		treeItem: entries,
461+	}
462+	wg.Add(1)
463+	go func() {
464+		defer wg.Done()
465+		tw.walk(tree, "")
466+	}()
467 
468-		var lastCommits []*git.Commit
469-		// `git rev-list` is pretty expensive here, so we have a flag to disable
470-		if pageData.Repo.HideTreeLastCommit {
471-			c.Logger.Info("skipping the process of finding the last commit for each file")
472-		} else {
473-			lastCommits, err = repo.RevList([]string{pageData.RevData.ID()}, git.RevListOptions{
474-				Path:           entry.Path,
475-				CommandOptions: git.CommandOptions{Args: []string{"-1"}},
476-			})
477-			bail(err)
478+	for e := range entries {
479+		wg.Add(1)
480+		go func(entry *TreeItem) {
481+			defer wg.Done()
482+			entry.Path = strings.TrimPrefix(entry.Path, "/")
483+
484+			var lastCommits []*git.Commit
485+			// `git rev-list` is pretty expensive here, so we have a flag to disable
486+			if pageData.Repo.HideTreeLastCommit {
487+				// c.Logger.Info("skipping the process of finding the last commit for each file")
488+			} else {
489+				lastCommits, err = repo.RevList([]string{pageData.RevData.ID()}, git.RevListOptions{
490+					Path:           entry.Path,
491+					CommandOptions: git.CommandOptions{Args: []string{"-1"}},
492+				})
493+				bail(err)
494+
495+				var lc *git.Commit
496+				if len(lastCommits) > 0 {
497+					lc = lastCommits[0]
498+				}
499+				entry.CommitURL = getCommitURL(lc.ID.String())
500+				entry.Summary = lc.Summary()
501+				entry.When = lc.Author.When.Format("02 Jan 06")
502+			}
503+
504+			fpath := getFileURL(
505+				pageData.RevData,
506+				fmt.Sprintf("%s.html", entry.Path),
507+			)
508+			entry.URL = template.URL(fpath)
509 
510-			var lc *git.Commit
511-			if len(lastCommits) > 0 {
512-				lc = lastCommits[0]
513+			readmeStr := c.writeHTMLTreeFile(pageData, entry)
514+			if readmeStr != "" {
515+				readme = readmeStr
516 			}
517-			entry.CommitURL = getCommitURL(lc.ID.String())
518-			entry.Summary = lc.Summary()
519-			entry.When = lc.Author.When.Format("02 Jan 06")
520-		}
521-		fpath := getFileURL(
522-			pageData.RevData,
523-			fmt.Sprintf("%s.html", entry.Path),
524-		)
525-		entry.URL = template.URL(fpath)
526+			treeEntries = append(treeEntries, entry)
527+		}(e)
528 	}
529 
530+	wg.Wait()
531+
532 	c.Logger.Infof(
533 		"compilation complete (%s) branch (%s)",
534 		c.RepoName,
535 		pageData.RevData.Name(),
536 	)
537 
538-	go func() {
539-		c.writeLog(pageData, logs)
540-	}()
541-	go func() {
542-		c.writeLogDiffs(repo, pageData, logs)
543-	}()
544-	go func() {
545-		c.writeTree(pageData, treeEntries)
546-	}()
547+	c.writeTree(pageData, treeEntries)
548 
549-	readme := c.writeHTMLTreeFiles(pageData, treeEntries)
550 	output.Readme = readme
551 	return output
552 }