diff options
Diffstat (limited to 'six/feed_parser.go')
-rw-r--r-- | six/feed_parser.go | 159 |
1 files changed, 159 insertions, 0 deletions
diff --git a/six/feed_parser.go b/six/feed_parser.go new file mode 100644 index 0000000..fb839f7 --- /dev/null +++ b/six/feed_parser.go | |||
@@ -0,0 +1,159 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0-only | ||
2 | // Copyright (C) 2020 Michael Crute <mike@crute.us>. All rights reserved. | ||
3 | // | ||
4 | // Use of this source code is governed by a license that can be found in the | ||
5 | // LICENSE file. | ||
6 | |||
7 | package six | ||
8 | |||
9 | import ( | ||
10 | "encoding/csv" | ||
11 | "fmt" | ||
12 | "io" | ||
13 | "net/http" | ||
14 | "os" | ||
15 | ) | ||
16 | |||
17 | const sixFeedUrl = "https://www.seattleix.net/autogen/participants.csv" | ||
18 | |||
19 | // Error indicating that a line in the SIX CSV feed could not be parsed. | ||
20 | // Contains the actual error as well as the line number on which the error | ||
21 | // occurred. | ||
22 | type SIXParticipantParseError struct { | ||
23 | Line int | ||
24 | Err error | ||
25 | } | ||
26 | |||
27 | // Return the error string for the error | ||
28 | func (e *SIXParticipantParseError) Error() string { | ||
29 | return fmt.Sprintf("record on line %d: %s", e.Line, e.Err) | ||
30 | } | ||
31 | |||
32 | // Returns the wrapped error | ||
33 | func (e *SIXParticipantParseError) Unwrap() error { | ||
34 | return e.Err | ||
35 | } | ||
36 | |||
37 | // Parser for the SIX participant CSV feed. | ||
38 | type SIXParser struct { | ||
39 | Records []*SIXParticipant | ||
40 | Errors []error | ||
41 | asnIndex map[int][]*SIXParticipant | ||
42 | headers []string | ||
43 | } | ||
44 | |||
45 | // Builds a new SIXParser | ||
46 | func NewSIXParser() *SIXParser { | ||
47 | return &SIXParser{ | ||
48 | Records: []*SIXParticipant{}, | ||
49 | Errors: []error{}, | ||
50 | asnIndex: map[int][]*SIXParticipant{}, | ||
51 | headers: nil, | ||
52 | } | ||
53 | } | ||
54 | |||
55 | // After parsing the feed will return a list of participant structs for the | ||
56 | // passed ASN as well as a boolean indicating if the participant exists. | ||
57 | // Participants may have several connections to the exchange and each | ||
58 | // connection is a different participant. | ||
59 | func (p *SIXParser) GetParticipantByASN(asn int) ([]*SIXParticipant, bool) { | ||
60 | r, ok := p.asnIndex[asn] | ||
61 | return r, ok | ||
62 | } | ||
63 | |||
64 | // Adds a participant to the SIXParticipant struct | ||
65 | func (p *SIXParser) AddParticipant(sp *SIXParticipant) { | ||
66 | p.Records = append(p.Records, sp) | ||
67 | |||
68 | if _, ok := p.asnIndex[sp.ASN]; !ok { | ||
69 | p.asnIndex[sp.ASN] = []*SIXParticipant{sp} | ||
70 | } else { | ||
71 | p.asnIndex[sp.ASN] = append(p.asnIndex[sp.ASN], sp) | ||
72 | } | ||
73 | } | ||
74 | |||
75 | func (p *SIXParser) sliceToHeaderMap(s []string) map[string]string { | ||
76 | r := map[string]string{} | ||
77 | for i, k := range p.headers { | ||
78 | r[k] = s[i] | ||
79 | } | ||
80 | return r | ||
81 | } | ||
82 | |||
83 | func (p *SIXParser) addRow(rn int, r []string) { | ||
84 | defer func() { | ||
85 | if e := recover(); e != nil { | ||
86 | p.Errors = append(p.Errors, &SIXParticipantParseError{ | ||
87 | Line: rn, | ||
88 | Err: e.(error), | ||
89 | }) | ||
90 | } | ||
91 | }() | ||
92 | p.AddParticipant(NewSIXParticipantFromData(p.sliceToHeaderMap(r))) | ||
93 | } | ||
94 | |||
95 | // ASSUMPTION: The correct row size will not change | ||
96 | // | ||
97 | // Participants that aren't using the route servers tend to have inconsistent | ||
98 | // row data. The head and tail of the row are fine and contain the organization | ||
99 | // and connection information but the middle area that contains route server | ||
100 | // stats generally has 7-9 too few columns. This function will graft the head | ||
101 | // and tail onto an appropriately sized row. | ||
102 | func (p *SIXParser) fixupRow(r []string) []string { | ||
103 | k := make([]string, 49) | ||
104 | copy(k[:18], r[:18]) | ||
105 | copy(k[42:], r[len(r)-7:]) | ||
106 | return k | ||
107 | } | ||
108 | |||
109 | // Parse an io.Reader containing SIX participant data in CSV format and collect | ||
110 | // the results into the parser for later querying. | ||
111 | func (p *SIXParser) Parse(fr io.Reader) { | ||
112 | rn := 0 | ||
113 | cr := csv.NewReader(fr) | ||
114 | for { | ||
115 | rn++ | ||
116 | row, err := cr.Read() | ||
117 | if err == io.EOF { | ||
118 | return | ||
119 | } | ||
120 | if err != nil { | ||
121 | row = p.fixupRow(row) | ||
122 | p.Errors = append(p.Errors, err) | ||
123 | } | ||
124 | if p.headers == nil { | ||
125 | p.headers = row | ||
126 | continue | ||
127 | } | ||
128 | p.addRow(rn, row) | ||
129 | } | ||
130 | } | ||
131 | |||
132 | // Create a new SIXParser and parse the CSV file pointed to by the filename. | ||
133 | func ParseSIXCSV(filename string) (*SIXParser, error) { | ||
134 | fp, err := os.Open(filename) | ||
135 | if err != nil { | ||
136 | return nil, err | ||
137 | } | ||
138 | defer fp.Close() | ||
139 | |||
140 | sp := NewSIXParser() | ||
141 | sp.Parse(fp) | ||
142 | |||
143 | return sp, nil | ||
144 | } | ||
145 | |||
146 | // Create a new SIXParser and parse the contents of the SIX participant file | ||
147 | // locate on the SIX http server. | ||
148 | func FetchParseSIXCSV() (*SIXParser, error) { | ||
149 | res, err := http.Get(sixFeedUrl) | ||
150 | if err != nil { | ||
151 | return nil, err | ||
152 | } | ||
153 | defer res.Body.Close() | ||
154 | |||
155 | sp := NewSIXParser() | ||
156 | sp.Parse(res.Body) | ||
157 | |||
158 | return sp, nil | ||
159 | } | ||