gopdf/htmlbasic.go

/*
 * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung)
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

package gofpdf

import (
	"regexp"
	"strings"
)

// HTMLBasicSegmentType defines a segment of literal text in which the current
// attributes do not vary, or an open tag or a close tag.
type HTMLBasicSegmentType struct {
	Cat  byte              // 'O' open tag, 'C' close tag, 'T' text
	Str  string            // Literal text unchanged, tags are lower case
	Attr map[string]string // Attribute keys are lower case
}

// HTMLBasicTokenize returns a list of HTML tags and literal elements. This is
// done with regular expressions, so the result is only marginally better than
// useless.
func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) {
	// This routine is adapted from http://www.fpdf.org/
	list = make([]HTMLBasicSegmentType, 0, 16)
	htmlStr = strings.Replace(htmlStr, "\n", " ", -1)
	htmlStr = strings.Replace(htmlStr, "\r", "", -1)
	tagRe, _ := regexp.Compile(`(?U)<.*>`)
	attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`)
	capList := tagRe.FindAllStringIndex(htmlStr, -1)
	if capList != nil {
		var seg HTMLBasicSegmentType
		var parts []string
		pos := 0
		for _, cap := range capList {
			if pos < cap[0] {
				seg.Cat = 'T'
				seg.Str = htmlStr[pos:cap[0]]
				seg.Attr = nil
				list = append(list, seg)
			}
			if htmlStr[cap[0]+1] == '/' {
				seg.Cat = 'C'
				seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1])
				seg.Attr = nil
				list = append(list, seg)
			} else {
				// Extract attributes
				parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ")
				if len(parts) > 0 {
					for j, part := range parts {
						if j == 0 {
							seg.Cat = 'O'
							seg.Str = strings.ToLower(parts[0])
							seg.Attr = make(map[string]string)
						} else {
							attrList := attrRe.FindAllStringSubmatch(part, -1)
							if attrList != nil {
								for _, attr := range attrList {
									seg.Attr[strings.ToLower(attr[1])] = attr[2]
								}
							}
						}
					}
					list = append(list, seg)
				}
			}
			pos = cap[1]
		}
		if len(htmlStr) > pos {
			seg.Cat = 'T'
			seg.Str = htmlStr[pos:]
			seg.Attr = nil
			list = append(list, seg)
		}
	} else {
		list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil})
	}
	return
}

// HTMLBasicType is used for rendering a very basic subset of HTML. It supports
// only hyperlinks and bold, italic and underscore attributes. In the Link
// structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color
// of hyperlinks. The Bold, Italic and Underscore values define the hyperlink
// style.
type HTMLBasicType struct {
	pdf  *Fpdf
	Link struct {
		ClrR, ClrG, ClrB         int
		Bold, Italic, Underscore bool
	}
}

// HTMLBasicNew returns an instance that facilitates writing basic HTML in the
// specified PDF file.
func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) {
	html.pdf = f
	html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128
	html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true
	return
}

// Write prints text from the current position using the currently selected
// font. See HTMLBasicNew() to create a receiver that is associated with the
// PDF document instance. The text can be encoded with a basic subset of HTML
// that includes hyperlinks and tags for italic (I), bold (B), underscore
// (U) and center (CENTER) attributes. When the right margin is reached a line
// break occurs and text continues from the left margin. Upon method exit, the
// current position is left at the end of the text.
//
// lineHt indicates the line height in the unit of measure specified in New().
func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) {
	var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int
	var textR, textG, textB = html.pdf.GetTextColor()
	var hrefStr string
	if html.Link.Bold {
		linkBold = 1
	}
	if html.Link.Italic {
		linkItalic = 1
	}
	if html.Link.Underscore {
		linkUnderscore = 1
	}
	setStyle := func(boldAdj, italicAdj, underscoreAdj int) {
		styleStr := ""
		boldLvl += boldAdj
		if boldLvl > 0 {
			styleStr += "B"
		}
		italicLvl += italicAdj
		if italicLvl > 0 {
			styleStr += "I"
		}
		underscoreLvl += underscoreAdj
		if underscoreLvl > 0 {
			styleStr += "U"
		}
		html.pdf.SetFont("", styleStr, 0)
	}
	putLink := func(urlStr, txtStr string) {
		// Put a hyperlink
		html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB)
		setStyle(linkBold, linkItalic, linkUnderscore)
		html.pdf.WriteLinkString(lineHt, txtStr, urlStr)
		setStyle(-linkBold, -linkItalic, -linkUnderscore)
		html.pdf.SetTextColor(textR, textG, textB)
	}
	list := HTMLBasicTokenize(htmlStr)
	var ok bool
	alignStr := "L"
	for _, el := range list {
		switch el.Cat {
		case 'T':
			if len(hrefStr) > 0 {
				putLink(hrefStr, el.Str)
				hrefStr = ""
			} else {
				if alignStr == "C" || alignStr == "R" {
					html.pdf.WriteAligned(0, lineHt, el.Str, alignStr)
				} else {
					html.pdf.Write(lineHt, el.Str)
				}
			}
		case 'O':
			switch el.Str {
			case "b":
				setStyle(1, 0, 0)
			case "i":
				setStyle(0, 1, 0)
			case "u":
				setStyle(0, 0, 1)
			case "br":
				html.pdf.Ln(lineHt)
			case "center":
				html.pdf.Ln(lineHt)
				alignStr = "C"
			case "right":
				html.pdf.Ln(lineHt)
				alignStr = "R"
			case "left":
				html.pdf.Ln(lineHt)
				alignStr = "L"
			case "a":
				hrefStr, ok = el.Attr["href"]
				if !ok {
					hrefStr = ""
				}
			}
		case 'C':
			switch el.Str {
			case "b":
				setStyle(-1, 0, 0)
			case "i":
				setStyle(0, -1, 0)
			case "u":
				setStyle(0, 0, -1)
			case "center":
				html.pdf.Ln(lineHt)
				alignStr = "L"
			case "right":
				html.pdf.Ln(lineHt)
				alignStr = "L"
			}
		}
	}
}