Gopl 第五章 函数

说明

本文为GOPL第五章学习笔记

第五章 函数

遍历获取一个网址的全部url

框架

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
package main

import (
"fmt"
"golang.org/x/net/html"
"net/http"
"strings"
)
//本框架代码忽略所有error判断

//遍历获取全部链接
func visit(links []string, n *html.Node) []string {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" && strings.Contains(a.Val,"http") {
links = append(links, a.Val)
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = visit(links, c)
}
return links
}
//解析根地址
func findlinks2() []string{
resp,_ := http.Get("http://hao123.com")
doc,_ := html.Parse(resp.Body)
resp.Body.Close()
return visit(nil,doc)

}

func main(){
links := findlinks2()
for _,link := range links{
fmt.Println(link)
}
}

获取一个网页的全部标签

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package main
import (
"fmt"
"golang.org/x/net/html"
"net/http"
)
func main() {
url := "http://baidu.com"
outline(url)

}
func outline(url string){
resp,_ := http.Get(url)
defer resp.Body.Close()
doc,_ := html.Parse(resp.Body)
forEachNode(doc,startElement,endElement)
}
//将函数pre 和post 作为参数传入
func forEachNode(n *html.Node,pre,post func(n *html.Node)){
//若pre函数不为空则调用
if pre != nil{
pre(n)
}
for c:= n.FirstChild;c!=nil;c = c.NextSibling{
forEachNode(c,pre,post)
}
//若post函数不为空则调用
if post != nil{
post(n)
}
}
var depth int
func startElement(n *html.Node){
if n.Type == html.ElementNode{
fmt.Printf("%*s<%s>\n",depth*2,"",n.Data)
depth++
}
}
func endElement(n *html.Node){
if n.Type == html.ElementNode{
depth--
fmt.Printf("%*s<%s>\n",depth*2,"",n.Data)
}
}
/*输出
<html>
<head>
<meta>
<meta>
<head>
<body>
<body>
<html>
*/

拓扑排序

给出学习每一门课程的先决条件,确定学习顺序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
package main
import (
"fmt"
)
//拓扑排序,确认课程学习顺序,
var prereqs = map[string][]string{
"algorithms": {"data structures"},
"calculus": {"linear algebra"},

"compilers": {
"data structures",
"formal languages",
"computer organization",
},

"data structures": {"discrete math"},
"databases": {"data structures"},
"discrete math": {"intro to programming"},
"formal languages": {"discrete math"},
"networks": {"operating systems"},
"operating systems": {"data structures", "computer organization"},
"programming languages": {"data structures", "computer organization"},
}

func main() {
for i, course := range topoSort(prereqs) {
fmt.Printf("%d:\t%s\n", i+1, course)
}
}
func topoSort(m map[string][]string) []string{
var order []string
var keys []string
for key := range m{
keys = append(keys,key)
}
visit := make(map[string] bool)
// 匿名函数
// 匿名函数必须先声明再赋值
var visitALL func(keys []string)
//深度遍历
visitALL = func(keys []string){
for _,key := range keys{
if !visit[key]{
visit[key] = true
visitALL(m[key])
order = append(order,key)
}
}
}
visitALL(keys)
return order
}

层次遍历,遍历一个URL所能到达的全部URL

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
package main
import (
"fmt"
"golang.org/x/net/html"
"log"
"net/http"
"strings"
)

// Extract 函数向指定URL发起HTTP get 请求
//解析HTML并返回html中存在的链接
func Extract(url string) ([]string, error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("getting %s: %s", url, resp.Status)
}

doc, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
return nil, fmt.Errorf("parsing %s as HTML: %v", url, err)
}

var links []string
visitNode := func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key != "href" {
continue
}
if strings.Contains(a.Val,"http") ==false{
continue
}
link, err := resp.Request.URL.Parse(a.Val)
if err != nil {
continue // ignore bad URLs
}
links = append(links, link.String())
}
}
}
forEachNode(doc, visitNode, nil)
return links, nil
}

func forEachNode(n *html.Node, pre, post func(n *html.Node)) {
if pre != nil {
pre(n)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
forEachNode(c, pre, post)
}
if post != nil {
post(n)
}
}
//封装Extract,输出URL
func crawl(url string) []string{
fmt.Println(url)
list, err := Extract(url)
if err!=nil{
log.Print(err)
}
return list
}
//层次遍历,对每个list元素调用f函数
//将返回内容添加到worklist中,对每个元素最多调用一次f函数
func bFS(f func(item string) []string, list []string){
visit := make(map[string] bool)
for len(list)>0{
items := list
list = nil
for _, item := range items{
if !visit[item]{
visit[item] =true
list = append(list,f(item)...)
}
}
}
}

func main(){
var urls []string
urls = append(urls,"http://hao123.com")
bFS(crawl, urls)
}

```
------------------------

### 延迟调用和恢复

```golang
func forEachNode(n *html.Node, pre, post func(n *html.Node)) {
if pre != nil {
pre(n)
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
forEachNode(c, pre, post)
}
if post != nil {
post(n)
}
}

// soleTitle返回doc中第一个非空 title元素
//如果没有title 则返回错误
func soleTitle(doc *html.Node) (title string, err error) {
type bailout struct{}

//延迟调用recover(),检查宕机值,若为bailout{}则是预料错误,返回一个普通错误
//若为其他非空值则是预料外错误,忽略recover ,继续宕机
defer func() {
switch p := recover(); p {
case nil:
// no panic
case bailout{}:
// "expected" panic
err = fmt.Errorf("multiple title elements")
default:
panic(p) // unexpected panic; carry on panicking
}
}()

// 如果发现多于一个非空title 则退出递归
forEachNode(doc, func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "title" &&
n.FirstChild != nil {
if title != "" {
panic(bailout{})
}
title = n.FirstChild.Data
}
}, nil)
if title == "" {
return "", fmt.Errorf("no title element")
}
return title, nil
}


func title(url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}

// Check Content-Type is HTML (e.g., "text/html; charset=utf-8").
ct := resp.Header.Get("Content-Type")
if ct != "text/html" && !strings.HasPrefix(ct, "text/html;") {
resp.Body.Close()
return fmt.Errorf("%s has type %s, not text/html", url, ct)
}

doc, err := html.Parse(resp.Body)
resp.Body.Close()
if err != nil {
return fmt.Errorf("parsing %s as HTML: %v", url, err)
}
title, err := soleTitle(doc)
if err != nil {
return err
}
fmt.Println(title)
return nil
}

func main() {
var urls []string
urls = append(urls,"http://hao123.com","http://baidu.com")

for _, arg := range urls {
if err := title(arg); err != nil {
fmt.Fprintf(os.Stderr, "title: %v\n", err)
}
}
}