fixes for tests.

3 years ago · 903d5713fc
parent 0872da57d7
commit 903d5713fc
2 changed files with 66 additions and 17 deletions
--- a/docs/DOWNSAMPLING.md
+++ b/docs/DOWNSAMPLING.md
@ -0,0 +1,42 @@
+# Downsampling
+
+Scrutiny collects alot of data, that can cause the database to grow unbounded. 
+
+- Smart data
+- Smart test data
+- Temperature data
+- Disk metrics (capacity/usage)
+- etc
+
+This data must be accurate in the short term, and is useful for doing trend analysis in the long term.
+However, for trend analysis we only need aggregate data, individual data points are not as useful.
+
+Scrutiny will automatically downsample data on a schedule to ensure that the database size stays reasonable, while still
+ensuring historical data is present for comparisons.
+
+
+| Bucket Name | Retention Period | Downsampling Range | Downsampling Aggregation Window | Downsampling Cron | Comments |
+| --- | --- | --- | --- | --- | --- |
+| `metrics` | 15 days | `-2w -1w` | `1w` | weekly on Sunday at 1:00am |
+| `metrics_weekly` | 9 weeks | `-2mo -1mo` | `1mo` | monthly on first day of the month at 1:30am
+| `metrics_monthly` | 25 months | `-2y -1y` | `1y` | yearly on the first day of the year at 2:00am
+| `metrics_yearly` | forever | - | - | - | |
+
+
+After 5 months, here's how may data points should exist in each bucket for one disk
+
+| Bucket Name | Datapoints | Comments |
+| --- | --- | --- |
+| `metrics` | 15 | 7 daily datapoints , up to 7 pending data, 1 buffer data point |
+| `metrics_weekly` | 9 | 4 aggregated weekly data points, 4 pending datapoints, 1 buffer data point |
+| `metrics_monthly` | 3 | 3 aggregated monthly data points | 
+| `metrics_yearly` | 0 | |
+
+After 5 years, here's how may data points should exist in each bucket for one disk
+
+| Bucket Name | Datapoints | Comments |
+| --- | --- | --- |
+| `metrics` | - | - |
+| `metrics_weekly` | - | 
+| `metrics_monthly` | - |
+| `metrics_yearly` | - | 
--- a/webapp/backend/pkg/database/scrutiny_repository.go
+++ b/webapp/backend/pkg/database/scrutiny_repository.go
@ -295,8 +295,7 @@ func (sr *scrutinyRepository) DownsampleScript(aggregationType string) string {
  smart_data = from(bucket: sourceBucket)
  |> range(start: rangeStart, stop: rangeEnd)
  |> filter(fn: (r) => r["_measurement"] == "smart" )
-  |> filter(fn: (r) => r["_field"] !~ /(raw_string|_measurement|device_protocol|status_reason|device_wwn|attribute_id|when_failed)/)
-  |> last()
+  |> filter(fn: (r) => r["_field"] !~ /(_measurement|device_protocol|device_wwn|attribute_id|raw_string|status_reason|when_failed)/)
  |> yield(name: "last")

  smart_data
@ -306,7 +305,7 @@ func (sr *scrutinyRepository) DownsampleScript(aggregationType string) string {
  temp_data = from(bucket: sourceBucket)
  |> range(start: rangeStart, stop: rangeEnd)
  |> filter(fn: (r) => r["_measurement"] == "temp")
-  |> last()
+  |> toInt()
  |> yield(name: "mean")

  temp_data
@ -704,20 +703,28 @@ func (sr *scrutinyRepository) lookupNestedDurationKeys(durationKey string) []str

 func (sr *scrutinyRepository) aggregateTempQuery(durationKey string) string {

-	//TODO: change the query range to a variable.
-	//queryStr := fmt.Sprintf(`
-	//import "influxdata/influxdb/schema"
-	//from(bucket: "%s")
-	//|> range(start: %s, stop: now())
-	//|> filter(fn: (r) => r["_measurement"] == "temp" )
-	//|> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
-	//|> schema.fieldsAsCols()
-	//|> group(columns: ["device_wwn"])
-	//|> yield(name: "last")
-	//	`,
-	//	sr.lookupBucketName(durationKey),
-	//	sr.lookupDuration(durationKey),
-	//)
+	/*
+		import "influxdata/influxdb/schema"
+		weekData = from(bucket: "metrics")
+		  |> range(start: -1w, stop: now())
+		  |> filter(fn: (r) => r["_measurement"] == "temp" )
+		  |> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
+		  |> group(columns: ["device_wwn"])
+		  |> toInt()
+
+		monthData = from(bucket: "metrics_weekly")
+		  |> range(start: -1mo, stop: now())
+		  |> filter(fn: (r) => r["_measurement"] == "temp" )
+		  |> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
+		  |> group(columns: ["device_wwn"])
+		  |> toInt()
+
+		union(tables: [weekData, monthData])
+		  |> group(columns: ["device_wwn"])
+		  |> sort(columns: ["_time"], desc: false)
+		  |> schema.fieldsAsCols()
+
+	*/

 	partialQueryStr := []string{`import "influxdata/influxdb/schema"`}