Gopl 第五章函数

发表于 2019-06-01 | 更新于: 2019-08-31 | 分类于 Go

字数统计: 1,490 字

说明

本文为GOPL第五章学习笔记

第五章函数

遍历获取一个网址的全部url

框架

package main

import (
	"fmt"
	"golang.org/x/net/html"
	"net/http"
	"strings"
)
//本框架代码忽略所有error判断

//遍历获取全部链接
func visit(links []string, n *html.Node) []string {
	if n.Type == html.ElementNode && n.Data == "a" {
		for _, a := range n.Attr {
			if a.Key == "href"  && strings.Contains(a.Val,"http") {
				links = append(links, a.Val)
			}
		}
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		links = visit(links, c)
	}
	return links
}
//解析根地址
func findlinks2() []string{
		resp,_ := http.Get("http://hao123.com")
		doc,_ := html.Parse(resp.Body)
		resp.Body.Close()
		return visit(nil,doc)

}

func main(){
	links := findlinks2()
	for _,link := range links{
		fmt.Println(link)
	}
}

获取一个网页的全部标签

package main
import (
	"fmt"
	"golang.org/x/net/html"
	"net/http"
)
func main() {
	url := "http://baidu.com"
	outline(url)

}
func outline(url string){
	resp,_ := http.Get(url)
	defer resp.Body.Close()
	doc,_ := html.Parse(resp.Body)
	forEachNode(doc,startElement,endElement)
}
//将函数pre 和post 作为参数传入
func forEachNode(n *html.Node,pre,post func(n *html.Node)){
	//若pre函数不为空则调用
	if pre != nil{
		pre(n)
	}
	for c:= n.FirstChild;c!=nil;c = c.NextSibling{
		forEachNode(c,pre,post)
	}
	//若post函数不为空则调用
	if post != nil{
		post(n)
	}
}
var depth int
func startElement(n *html.Node){
	if n.Type == html.ElementNode{
		fmt.Printf("%*s<%s>\n",depth*2,"",n.Data)
		depth++
	}
}
func endElement(n *html.Node){
	if n.Type == html.ElementNode{
		depth--
		fmt.Printf("%*s<%s>\n",depth*2,"",n.Data)
	}
}
/*输出
<html>
  <head>
    <meta>
    <meta>
  <head>
  <body>
  <body>
<html>
*/

拓扑排序

给出学习每一门课程的先决条件，确定学习顺序

package main
import (
	"fmt"
)
//拓扑排序，确认课程学习顺序，
var prereqs = map[string][]string{
	"algorithms": {"data structures"},
	"calculus":   {"linear algebra"},

	"compilers": {
		"data structures",
		"formal languages",
		"computer organization",
	},

	"data structures":       {"discrete math"},
	"databases":             {"data structures"},
	"discrete math":         {"intro to programming"},
	"formal languages":      {"discrete math"},
	"networks":              {"operating systems"},
	"operating systems":     {"data structures", "computer organization"},
	"programming languages": {"data structures", "computer organization"},
}

func main() {
	for i, course := range topoSort(prereqs) {
		fmt.Printf("%d:\t%s\n", i+1, course)
	}
}
func topoSort(m map[string][]string) []string{
	var order []string
	var keys []string
	for key := range m{
		keys = append(keys,key)
	}
	visit := make(map[string] bool)
	// 匿名函数
	// 匿名函数必须先声明再赋值
	var visitALL func(keys []string)
	//深度遍历
	visitALL = func(keys []string){
		for _,key := range keys{
			if !visit[key]{
				visit[key] = true
				visitALL(m[key])
				order = append(order,key)
			}
		}
	}
	visitALL(keys)
	return order
}

层次遍历，遍历一个URL所能到达的全部URL

package main
import (
	"fmt"
	"golang.org/x/net/html"
	"log"
	"net/http"
	"strings"
)

// Extract 函数向指定URL发起HTTP get 请求
//解析HTML并返回html中存在的链接
func Extract(url string) ([]string, error) {
	resp, err := http.Get(url)
	if err != nil {
		return nil, err
	}
	if resp.StatusCode != http.StatusOK {
		resp.Body.Close()
		return nil, fmt.Errorf("getting %s: %s", url, resp.Status)
	}

	doc, err := html.Parse(resp.Body)
	resp.Body.Close()
	if err != nil {
		return nil, fmt.Errorf("parsing %s as HTML: %v", url, err)
	}

	var links []string
	visitNode := func(n *html.Node) {
		if n.Type == html.ElementNode && n.Data == "a" {
			for _, a := range n.Attr {
				if a.Key != "href"  {
					continue
				}
				if strings.Contains(a.Val,"http") ==false{
					continue
				}
				link, err := resp.Request.URL.Parse(a.Val)
				if err != nil {
					continue // ignore bad URLs
				}
				links = append(links, link.String())
			}
		}
	}
	forEachNode(doc, visitNode, nil)
	return links, nil
}

func forEachNode(n *html.Node, pre, post func(n *html.Node)) {
	if pre != nil {
		pre(n)
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		forEachNode(c, pre, post)
	}
	if post != nil {
		post(n)
	}
}
//封装Extract，输出URL
func crawl(url string) []string{
	fmt.Println(url)
	list, err := Extract(url)
	if err!=nil{
		log.Print(err)
	}
	return list
}
//层次遍历，对每个list元素调用f函数
//将返回内容添加到worklist中，对每个元素最多调用一次f函数
func bFS(f func(item string) []string, list []string){
	visit := make(map[string] bool)
	for len(list)>0{
		items := list
		list = nil
		for _, item := range items{
			if !visit[item]{
				visit[item] =true
				list = append(list,f(item)...)
			}
		}
	}
}

func main(){
	var urls []string
	urls = append(urls,"http://hao123.com")
	bFS(crawl, urls)
}

```   
------------------------

### 延迟调用和恢复  

```golang
func forEachNode(n *html.Node, pre, post func(n *html.Node)) {
	if pre != nil {
		pre(n)
	}
	for c := n.FirstChild; c != nil; c = c.NextSibling {
		forEachNode(c, pre, post)
	}
	if post != nil {
		post(n)
	}
}

// soleTitle返回doc中第一个非空 title元素
//如果没有title 则返回错误
func soleTitle(doc *html.Node) (title string, err error) {
	type bailout struct{}

	//延迟调用recover()，检查宕机值，若为bailout{}则是预料错误，返回一个普通错误
	//若为其他非空值则是预料外错误，忽略recover ，继续宕机
	defer func() {
		switch p := recover(); p {
		case nil:
			// no panic
		case bailout{}:
			// "expected" panic
			err = fmt.Errorf("multiple title elements")
		default:
			panic(p) // unexpected panic; carry on panicking
		}
	}()

	// 如果发现多于一个非空title 则退出递归
	forEachNode(doc, func(n *html.Node) {
		if n.Type == html.ElementNode && n.Data == "title" &&
			n.FirstChild != nil {
			if title != "" {
				panic(bailout{}) 
			}
			title = n.FirstChild.Data
		}
	}, nil)
	if title == "" {
		return "", fmt.Errorf("no title element")
	}
	return title, nil
}


func title(url string) error {
	resp, err := http.Get(url)
	if err != nil {
		return err
	}

	// Check Content-Type is HTML (e.g., "text/html; charset=utf-8").
	ct := resp.Header.Get("Content-Type")
	if ct != "text/html" && !strings.HasPrefix(ct, "text/html;") {
		resp.Body.Close()
		return fmt.Errorf("%s has type %s, not text/html", url, ct)
	}

	doc, err := html.Parse(resp.Body)
	resp.Body.Close()
	if err != nil {
		return fmt.Errorf("parsing %s as HTML: %v", url, err)
	}
	title, err := soleTitle(doc)
	if err != nil {
		return err
	}
	fmt.Println(title)
	return nil
}

func main() {
	var urls []string
	urls = append(urls,"http://hao123.com","http://baidu.com")

	for _, arg := range urls {
		if err := title(arg); err != nil {
			fmt.Fprintf(os.Stderr, "title: %v\n", err)
		}
	}
}