[tor-commits] [snowflake/main] Use tpo geoip library
meskio at torproject.org
meskio at torproject.org
Mon Oct 4 10:26:43 UTC 2021
commit 4396d505a3b872fda43ca6cf43264d0f25cd8e9f
Author: meskio <meskio at torproject.org>
Date: Thu Sep 30 12:10:59 2021 +0200
Use tpo geoip library
Now the geoip implmentation has being moved to it's own library to be
shared between projects.
---
broker/geoip.go | 240 ----------------------------------------
broker/metrics.go | 38 ++-----
broker/snowflake-broker_test.go | 96 +---------------
go.mod | 1 +
go.sum | 2 +
5 files changed, 13 insertions(+), 364 deletions(-)
diff --git a/broker/geoip.go b/broker/geoip.go
deleted file mode 100644
index 708cdad..0000000
--- a/broker/geoip.go
+++ /dev/null
@@ -1,240 +0,0 @@
-/*
-This code is for loading database data that maps ip addresses to countries
-for collecting and presenting statistics on snowflake use that might alert us
-to censorship events.
-
-The functions here are heavily based off of how tor maintains and searches their
-geoip database
-
-The tables used for geoip data must be structured as follows:
-
-Recognized line format for IPv4 is:
- INTIPLOW,INTIPHIGH,CC
- where INTIPLOW and INTIPHIGH are IPv4 addresses encoded as big-endian 4-byte unsigned
- integers, and CC is a country code.
-
-Note that the IPv4 line format
- "INTIPLOW","INTIPHIGH","CC","CC3","COUNTRY NAME"
-is not currently supported.
-
-Recognized line format for IPv6 is:
- IPV6LOW,IPV6HIGH,CC
- where IPV6LOW and IPV6HIGH are IPv6 addresses and CC is a country code.
-
-It also recognizes, and skips over, blank lines and lines that start
-with '#' (comments).
-
-*/
-package main
-
-import (
- "bufio"
- "bytes"
- "crypto/sha1"
- "encoding/hex"
- "fmt"
- "io"
- "log"
- "net"
- "os"
- "sort"
- "strconv"
- "strings"
- "sync"
-)
-
-type GeoIPTable interface {
- parseEntry(string) (*GeoIPEntry, error)
- Len() int
- Append(GeoIPEntry)
- ElementAt(int) GeoIPEntry
- Lock()
- Unlock()
-}
-
-type GeoIPEntry struct {
- ipLow net.IP
- ipHigh net.IP
- country string
-}
-
-type GeoIPv4Table struct {
- table []GeoIPEntry
-
- lock sync.Mutex // synchronization for geoip table accesses and reloads
-}
-
-type GeoIPv6Table struct {
- table []GeoIPEntry
-
- lock sync.Mutex // synchronization for geoip table accesses and reloads
-}
-
-func (table *GeoIPv4Table) Len() int { return len(table.table) }
-func (table *GeoIPv6Table) Len() int { return len(table.table) }
-
-func (table *GeoIPv4Table) Append(entry GeoIPEntry) {
- (*table).table = append(table.table, entry)
-}
-func (table *GeoIPv6Table) Append(entry GeoIPEntry) {
- (*table).table = append(table.table, entry)
-}
-
-func (table *GeoIPv4Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
-func (table *GeoIPv6Table) ElementAt(i int) GeoIPEntry { return table.table[i] }
-
-func (table *GeoIPv4Table) Lock() { (*table).lock.Lock() }
-func (table *GeoIPv6Table) Lock() { (*table).lock.Lock() }
-
-func (table *GeoIPv4Table) Unlock() { (*table).lock.Unlock() }
-func (table *GeoIPv6Table) Unlock() { (*table).lock.Unlock() }
-
-// Convert a geoip IP address represented as a big-endian unsigned integer to net.IP
-func geoipStringToIP(ipStr string) (net.IP, error) {
- ip, err := strconv.ParseUint(ipStr, 10, 32)
- if err != nil {
- return net.IPv4(0, 0, 0, 0), fmt.Errorf("error parsing IP %s", ipStr)
- }
- var bytes [4]byte
- bytes[0] = byte(ip & 0xFF)
- bytes[1] = byte((ip >> 8) & 0xFF)
- bytes[2] = byte((ip >> 16) & 0xFF)
- bytes[3] = byte((ip >> 24) & 0xFF)
-
- return net.IPv4(bytes[3], bytes[2], bytes[1], bytes[0]), nil
-}
-
-//Parses a line in the provided geoip file that corresponds
-//to an address range and a two character country code
-func (table *GeoIPv4Table) parseEntry(candidate string) (*GeoIPEntry, error) {
-
- if candidate[0] == '#' {
- return nil, nil
- }
-
- parsedCandidate := strings.Split(candidate, ",")
-
- if len(parsedCandidate) != 3 {
- return nil, fmt.Errorf("provided geoip file is incorrectly formatted. Could not parse line:\n%s", parsedCandidate)
- }
-
- low, err := geoipStringToIP(parsedCandidate[0])
- if err != nil {
- return nil, err
- }
- high, err := geoipStringToIP(parsedCandidate[1])
- if err != nil {
- return nil, err
- }
-
- geoipEntry := &GeoIPEntry{
- ipLow: low,
- ipHigh: high,
- country: parsedCandidate[2],
- }
-
- return geoipEntry, nil
-}
-
-//Parses a line in the provided geoip file that corresponds
-//to an address range and a two character country code
-func (table *GeoIPv6Table) parseEntry(candidate string) (*GeoIPEntry, error) {
-
- if candidate[0] == '#' {
- return nil, nil
- }
-
- parsedCandidate := strings.Split(candidate, ",")
-
- if len(parsedCandidate) != 3 {
- return nil, fmt.Errorf("")
- }
-
- low := net.ParseIP(parsedCandidate[0])
- if low == nil {
- return nil, fmt.Errorf("")
- }
- high := net.ParseIP(parsedCandidate[1])
- if high == nil {
- return nil, fmt.Errorf("")
- }
-
- geoipEntry := &GeoIPEntry{
- ipLow: low,
- ipHigh: high,
- country: parsedCandidate[2],
- }
-
- return geoipEntry, nil
-}
-
-//Loads provided geoip file into our tables
-//Entries are stored in a table
-func GeoIPLoadFile(table GeoIPTable, pathname string) error {
- //open file
- geoipFile, err := os.Open(pathname)
- if err != nil {
- return err
- }
- defer geoipFile.Close()
-
- hash := sha1.New()
-
- table.Lock()
- defer table.Unlock()
-
- hashedFile := io.TeeReader(geoipFile, hash)
-
- //read in strings and call parse function
- scanner := bufio.NewScanner(hashedFile)
- for scanner.Scan() {
- entry, err := table.parseEntry(scanner.Text())
- if err != nil {
- return fmt.Errorf("provided geoip file is incorrectly formatted. Line is: %+q", scanner.Text())
- }
-
- if entry != nil {
- table.Append(*entry)
- }
-
- }
- if err := scanner.Err(); err != nil {
- return err
- }
-
- sha1Hash := hex.EncodeToString(hash.Sum(nil))
- log.Println("Using geoip file ", pathname, " with checksum", sha1Hash)
- log.Println("Loaded ", table.Len(), " entries into table")
-
- return nil
-}
-
-//Returns the country location of an IPv4 or IPv6 address, and a boolean value
-//that indicates whether the IP address was present in the geoip database
-func GetCountryByAddr(table GeoIPTable, ip net.IP) (string, bool) {
-
- table.Lock()
- defer table.Unlock()
-
- //look IP up in database
- index := sort.Search(table.Len(), func(i int) bool {
- entry := table.ElementAt(i)
- return (bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0)
- })
-
- if index == table.Len() {
- return "", false
- }
-
- // check to see if addr is in the range specified by the returned index
- // search on IPs in invalid ranges (e.g., 127.0.0.0/8) will return the
- //country code of the next highest range
- entry := table.ElementAt(index)
- if !(bytes.Compare(ip.To16(), entry.ipLow.To16()) >= 0 &&
- bytes.Compare(ip.To16(), entry.ipHigh.To16()) <= 0) {
- return "", false
- }
-
- return table.ElementAt(index).country, true
-
-}
diff --git a/broker/metrics.go b/broker/metrics.go
index e8a6b0c..8229e0f 100644
--- a/broker/metrics.go
+++ b/broker/metrics.go
@@ -15,6 +15,7 @@ import (
"time"
"github.com/prometheus/client_golang/prometheus"
+ "gitlab.torproject.org/tpo/anti-censorship/geoip"
)
const (
@@ -38,8 +39,7 @@ type CountryStats struct {
// Implements Observable
type Metrics struct {
logger *log.Logger
- tablev4 *GeoIPv4Table
- tablev6 *GeoIPv6Table
+ geoipdb *geoip.Geoip
countryStats CountryStats
clientRoundtripEstimate time.Duration
@@ -115,19 +115,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
}
ip := net.ParseIP(addr)
- if ip.To4() != nil {
- //This is an IPv4 address
- if m.tablev4 == nil {
- return
- }
- country, ok = GetCountryByAddr(m.tablev4, ip)
- } else {
- if m.tablev6 == nil {
- return
- }
- country, ok = GetCountryByAddr(m.tablev6, ip)
+ if m.geoipdb == nil {
+ return
}
-
+ country, ok = m.geoipdb.GetCountryByAddr(ip)
if !ok {
country = "??"
}
@@ -164,23 +155,10 @@ func (m *Metrics) UpdateCountryStats(addr string, proxyType string, natType stri
func (m *Metrics) LoadGeoipDatabases(geoipDB string, geoip6DB string) error {
// Load geoip databases
+ var err error
log.Println("Loading geoip databases")
- tablev4 := new(GeoIPv4Table)
- err := GeoIPLoadFile(tablev4, geoipDB)
- if err != nil {
- m.tablev4 = nil
- return err
- }
- m.tablev4 = tablev4
-
- tablev6 := new(GeoIPv6Table)
- err = GeoIPLoadFile(tablev6, geoip6DB)
- if err != nil {
- m.tablev6 = nil
- return err
- }
- m.tablev6 = tablev6
- return nil
+ m.geoipdb, err = geoip.New(geoipDB, geoip6DB)
+ return err
}
func NewMetrics(metricsLogger *log.Logger) (*Metrics, error) {
diff --git a/broker/snowflake-broker_test.go b/broker/snowflake-broker_test.go
index 233cfea..25a947c 100644
--- a/broker/snowflake-broker_test.go
+++ b/broker/snowflake-broker_test.go
@@ -6,7 +6,6 @@ import (
"io"
"io/ioutil"
"log"
- "net"
"net/http"
"net/http/httptest"
"os"
@@ -473,106 +472,15 @@ func TestSnowflakeHeap(t *testing.T) {
})
}
-func TestGeoip(t *testing.T) {
+func TestInvalidGeoipFile(t *testing.T) {
Convey("Geoip", t, func() {
- tv4 := new(GeoIPv4Table)
- err := GeoIPLoadFile(tv4, "test_geoip")
- So(err, ShouldEqual, nil)
- tv6 := new(GeoIPv6Table)
- err = GeoIPLoadFile(tv6, "test_geoip6")
- So(err, ShouldEqual, nil)
-
- Convey("IPv4 Country Mapping Tests", func() {
- for _, test := range []struct {
- addr, cc string
- ok bool
- }{
- {
- "129.97.208.23", //uwaterloo
- "CA",
- true,
- },
- {
- "127.0.0.1",
- "",
- false,
- },
- {
- "255.255.255.255",
- "",
- false,
- },
- {
- "0.0.0.0",
- "",
- false,
- },
- {
- "223.252.127.255", //test high end of range
- "JP",
- true,
- },
- {
- "223.252.127.255", //test low end of range
- "JP",
- true,
- },
- } {
- country, ok := GetCountryByAddr(tv4, net.ParseIP(test.addr))
- So(country, ShouldEqual, test.cc)
- So(ok, ShouldResemble, test.ok)
- }
- })
-
- Convey("IPv6 Country Mapping Tests", func() {
- for _, test := range []struct {
- addr, cc string
- ok bool
- }{
- {
- "2620:101:f000:0:250:56ff:fe80:168e", //uwaterloo
- "CA",
- true,
- },
- {
- "fd00:0:0:0:0:0:0:1",
- "",
- false,
- },
- {
- "0:0:0:0:0:0:0:0",
- "",
- false,
- },
- {
- "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff",
- "",
- false,
- },
- {
- "2a07:2e47:ffff:ffff:ffff:ffff:ffff:ffff", //test high end of range
- "FR",
- true,
- },
- {
- "2a07:2e40::", //test low end of range
- "FR",
- true,
- },
- } {
- country, ok := GetCountryByAddr(tv6, net.ParseIP(test.addr))
- So(country, ShouldEqual, test.cc)
- So(ok, ShouldResemble, test.ok)
- }
- })
-
// Make sure things behave properly if geoip file fails to load
ctx := NewBrokerContext(NullLogger())
if err := ctx.metrics.LoadGeoipDatabases("invalid_filename", "invalid_filename6"); err != nil {
log.Printf("loading geo ip databases returned error: %v", err)
}
ctx.metrics.UpdateCountryStats("127.0.0.1", "", NATUnrestricted)
- So(ctx.metrics.tablev4, ShouldEqual, nil)
+ So(ctx.metrics.geoipdb, ShouldEqual, nil)
})
}
diff --git a/go.mod b/go.mod
index 36585aa..9d6b6ac 100644
--- a/go.mod
+++ b/go.mod
@@ -16,6 +16,7 @@ require (
github.com/smartystreets/goconvey v1.6.4
github.com/xtaci/kcp-go/v5 v5.6.1
github.com/xtaci/smux v1.5.15
+ gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01
golang.org/x/crypto v0.0.0-20210317152858-513c2a44f670
golang.org/x/net v0.0.0-20210316092652-d523dce5a7f4
golang.org/x/sys v0.0.0-20210317225723-c4fcb01b228e // indirect
diff --git a/go.sum b/go.sum
index f0b3927..34bc936 100644
--- a/go.sum
+++ b/go.sum
@@ -358,6 +358,8 @@ github.com/xtaci/smux v1.5.15 h1:6hMiXswcleXj5oNfcJc+DXS8Vj36XX2LaX98udog6Kc=
github.com/xtaci/smux v1.5.15/go.mod h1:OMlQbT5vcgl2gb49mFkYo6SMf+zP3rcjcwQz7ZU7IGY=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
+gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01 h1:4949mHh9Vj2/okk48yG8nhP6TosFWOUfSfSr502sKGE=
+gitlab.torproject.org/tpo/anti-censorship/geoip v0.0.0-20210928150955-7ce4b3d98d01/go.mod h1:K3LOI4H8fa6j+7E10ViHeGEQV10304FG4j94ypmKLjY=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.etcd.io/etcd v0.0.0-20191023171146-3cf2f69b5738/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg=
go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
More information about the tor-commits
mailing list