1
0

statistic.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. package statistic
  2. import (
  3. "path/filepath"
  4. "strings"
  5. "sync"
  6. "time"
  7. "github.com/microcosm-cc/bluemonday"
  8. "imuslab.com/zoraxy/mod/database"
  9. )
  10. /*
  11. Statistic Package
  12. This packet is designed to collection information
  13. and store them for future analysis
  14. */
  15. // Faststat, a interval summary for all collected data and avoid
  16. // looping through every data everytime a overview is needed
  17. type DailySummary struct {
  18. TotalRequest int64 //Total request of the day
  19. ErrorRequest int64 //Invalid request of the day, including error or not found
  20. ValidRequest int64 //Valid request of the day
  21. //Type counters
  22. ForwardTypes *sync.Map //Map that hold the forward types
  23. RequestOrigin *sync.Map //Map that hold [country ISO code]: visitor counter
  24. RequestClientIp *sync.Map //Map that hold all unique request IPs
  25. Referer *sync.Map //Map that store where the user was refered from
  26. UserAgent *sync.Map //Map that store the useragent of the request
  27. RequestURL *sync.Map //Request URL of the request object
  28. }
  29. type RequestInfo struct {
  30. IpAddr string //IP address of the downstream request
  31. RequestOriginalCountryISOCode string //ISO code of the country where the request originated
  32. Succ bool //If the request is successful and resp generated by upstream instead of Zoraxy (except static web server)
  33. StatusCode int //HTTP status code of the request
  34. ForwardType string //Forward type of the request, usually the proxy type (e.g. host-http, subdomain-websocket or vdir-http or any of the combination)
  35. Referer string //Referer of the downstream request
  36. UserAgent string //UserAgent of the downstream request
  37. RequestURL string //Request URL
  38. Target string //Target domain or hostname
  39. }
  40. type CollectorOption struct {
  41. Database *database.Database
  42. }
  43. type Collector struct {
  44. rtdataStopChan chan bool
  45. DailySummary *DailySummary
  46. Option *CollectorOption
  47. }
  48. func NewStatisticCollector(option CollectorOption) (*Collector, error) {
  49. option.Database.NewTable("stats")
  50. //Create the collector object
  51. thisCollector := Collector{
  52. DailySummary: NewDailySummary(),
  53. Option: &option,
  54. }
  55. //Load the stat if exists for today
  56. //This will exists if the program was forcefully restarted
  57. year, month, day := time.Now().Date()
  58. summary := thisCollector.LoadSummaryOfDay(year, month, day)
  59. if summary != nil {
  60. thisCollector.DailySummary = summary
  61. }
  62. //Schedule the realtime statistic clearing at midnight everyday
  63. rtstatStopChan := thisCollector.ScheduleResetRealtimeStats()
  64. thisCollector.rtdataStopChan = rtstatStopChan
  65. return &thisCollector, nil
  66. }
  67. // Write the current in-memory summary to database file
  68. func (c *Collector) SaveSummaryOfDay() {
  69. //When it is called in 0:00am, make sure it is stored as yesterday key
  70. t := time.Now().Add(-30 * time.Second)
  71. summaryKey := t.Format("2006_01_02")
  72. saveData := DailySummaryToExport(*c.DailySummary)
  73. c.Option.Database.Write("stats", summaryKey, saveData)
  74. }
  75. // Get the daily summary up until now
  76. func (c *Collector) GetCurrentDailySummary() *DailySummary {
  77. return c.DailySummary
  78. }
  79. // Load the summary of a day given
  80. func (c *Collector) LoadSummaryOfDay(year int, month time.Month, day int) *DailySummary {
  81. date := time.Date(year, time.Month(month), day, 0, 0, 0, 0, time.Local)
  82. summaryKey := date.Format("2006_01_02")
  83. targetSummaryExport := DailySummaryExport{}
  84. c.Option.Database.Read("stats", summaryKey, &targetSummaryExport)
  85. targetSummary := DailySummaryExportToSummary(targetSummaryExport)
  86. return &targetSummary
  87. }
  88. // Reset today summary, for debug or restoring injections
  89. func (c *Collector) ResetSummaryOfDay() {
  90. c.DailySummary = NewDailySummary()
  91. }
  92. // This function gives the current slot in the 288- 5 minutes interval of the day
  93. func (c *Collector) GetCurrentRealtimeStatIntervalId() int {
  94. now := time.Now()
  95. startOfDay := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local).Unix()
  96. secondsSinceStartOfDay := now.Unix() - startOfDay
  97. interval := secondsSinceStartOfDay / (5 * 60)
  98. return int(interval)
  99. }
  100. func (c *Collector) Close() {
  101. //Stop the ticker
  102. c.rtdataStopChan <- true
  103. //Write the buffered data into database
  104. c.SaveSummaryOfDay()
  105. }
  106. // Main function to record all the inbound traffics
  107. // Note that this function run in go routine and might have concurrent R/W issue
  108. // Please make sure there is no racing paramters in this function
  109. func (c *Collector) RecordRequest(ri RequestInfo) {
  110. go func() {
  111. c.DailySummary.TotalRequest++
  112. if ri.Succ {
  113. c.DailySummary.ValidRequest++
  114. } else {
  115. c.DailySummary.ErrorRequest++
  116. }
  117. //Store the request info into correct types of maps
  118. ft, ok := c.DailySummary.ForwardTypes.Load(ri.ForwardType)
  119. if !ok {
  120. c.DailySummary.ForwardTypes.Store(ri.ForwardType, 1)
  121. } else {
  122. c.DailySummary.ForwardTypes.Store(ri.ForwardType, ft.(int)+1)
  123. }
  124. originISO := strings.ToLower(ri.RequestOriginalCountryISOCode)
  125. fo, ok := c.DailySummary.RequestOrigin.Load(originISO)
  126. if !ok {
  127. c.DailySummary.RequestOrigin.Store(originISO, 1)
  128. } else {
  129. c.DailySummary.RequestOrigin.Store(originISO, fo.(int)+1)
  130. }
  131. //Filter out CF forwarded requests
  132. if strings.Contains(ri.IpAddr, ",") {
  133. ips := strings.Split(strings.TrimSpace(ri.IpAddr), ",")
  134. if len(ips) >= 1 && IsValidIPAddress(strings.TrimPrefix(ips[0], "[")) {
  135. //Example when forwarded from CF: 158.250.160.114,109.21.249.211
  136. //Or IPv6 [15c4:cbb4:cc98:4291:ffc1:3a46:06a1:51a7],109.21.249.211
  137. ri.IpAddr = ips[0]
  138. }
  139. }
  140. fi, ok := c.DailySummary.RequestClientIp.Load(ri.IpAddr)
  141. if !ok {
  142. c.DailySummary.RequestClientIp.Store(ri.IpAddr, 1)
  143. } else {
  144. c.DailySummary.RequestClientIp.Store(ri.IpAddr, fi.(int)+1)
  145. }
  146. //Record the referer
  147. p := bluemonday.StripTagsPolicy()
  148. filteredReferer := p.Sanitize(
  149. ri.Referer,
  150. )
  151. rf, ok := c.DailySummary.Referer.Load(filteredReferer)
  152. if !ok {
  153. c.DailySummary.Referer.Store(filteredReferer, 1)
  154. } else {
  155. c.DailySummary.Referer.Store(filteredReferer, rf.(int)+1)
  156. }
  157. //Record the UserAgent
  158. ua, ok := c.DailySummary.UserAgent.Load(ri.UserAgent)
  159. if !ok {
  160. c.DailySummary.UserAgent.Store(ri.UserAgent, 1)
  161. } else {
  162. c.DailySummary.UserAgent.Store(ri.UserAgent, ua.(int)+1)
  163. }
  164. //Record request URL, if it is a page
  165. ext := filepath.Ext(ri.RequestURL)
  166. if ext != "" && !isWebPageExtension(ext) {
  167. return
  168. }
  169. ru, ok := c.DailySummary.RequestURL.Load(ri.RequestURL)
  170. if !ok {
  171. c.DailySummary.RequestURL.Store(ri.RequestURL, 1)
  172. } else {
  173. c.DailySummary.RequestURL.Store(ri.RequestURL, ru.(int)+1)
  174. }
  175. }()
  176. //ADD MORE HERE IF NEEDED
  177. }
  178. // nightly task
  179. func (c *Collector) ScheduleResetRealtimeStats() chan bool {
  180. doneCh := make(chan bool)
  181. go func() {
  182. defer close(doneCh)
  183. for {
  184. // calculate duration until next midnight
  185. now := time.Now()
  186. // Get midnight of the next day in the local time zone
  187. midnight := time.Date(now.Year(), now.Month(), now.Day()+1, 0, 0, 0, 0, now.Location())
  188. // Calculate the duration until midnight
  189. duration := midnight.Sub(now)
  190. select {
  191. case <-time.After(duration):
  192. // store daily summary to database and reset summary
  193. c.SaveSummaryOfDay()
  194. c.DailySummary = NewDailySummary()
  195. case <-doneCh:
  196. // stop the routine
  197. return
  198. }
  199. }
  200. }()
  201. return doneCh
  202. }
  203. func NewDailySummary() *DailySummary {
  204. return &DailySummary{
  205. TotalRequest: 0,
  206. ErrorRequest: 0,
  207. ValidRequest: 0,
  208. ForwardTypes: &sync.Map{},
  209. RequestOrigin: &sync.Map{},
  210. RequestClientIp: &sync.Map{},
  211. Referer: &sync.Map{},
  212. UserAgent: &sync.Map{},
  213. RequestURL: &sync.Map{},
  214. }
  215. }
  216. func PrintDailySummary(summary *DailySummary) {
  217. summary.ForwardTypes.Range(func(key, value interface{}) bool {
  218. println(key.(string), value.(int))
  219. return true
  220. })
  221. summary.RequestOrigin.Range(func(key, value interface{}) bool {
  222. println(key.(string), value.(int))
  223. return true
  224. })
  225. summary.RequestClientIp.Range(func(key, value interface{}) bool {
  226. println(key.(string), value.(int))
  227. return true
  228. })
  229. summary.Referer.Range(func(key, value interface{}) bool {
  230. println(key.(string), value.(int))
  231. return true
  232. })
  233. summary.UserAgent.Range(func(key, value interface{}) bool {
  234. println(key.(string), value.(int))
  235. return true
  236. })
  237. summary.RequestURL.Range(func(key, value interface{}) bool {
  238. println(key.(string), value.(int))
  239. return true
  240. })
  241. }