From 903d5713fc69fa8c944e8611702af9473508a05c Mon Sep 17 00:00:00 2001
From: Jason Kulatunga <jason@thesparktree.com>
Date: Sun, 21 Nov 2021 14:39:39 -0800
Subject: [PATCH] fixes for tests.

---
 docs/DOWNSAMPLING.md                          | 42 +++++++++++++++++++
 .../pkg/database/scrutiny_repository.go       | 41 ++++++++++--------
 2 files changed, 66 insertions(+), 17 deletions(-)
 create mode 100644 docs/DOWNSAMPLING.md

diff --git a/docs/DOWNSAMPLING.md b/docs/DOWNSAMPLING.md
new file mode 100644
index 0000000..25a202a
--- /dev/null
+++ b/docs/DOWNSAMPLING.md
@@ -0,0 +1,42 @@
+# Downsampling
+
+Scrutiny collects alot of data, that can cause the database to grow unbounded. 
+
+- Smart data
+- Smart test data
+- Temperature data
+- Disk metrics (capacity/usage)
+- etc
+
+This data must be accurate in the short term, and is useful for doing trend analysis in the long term.
+However, for trend analysis we only need aggregate data, individual data points are not as useful.
+
+Scrutiny will automatically downsample data on a schedule to ensure that the database size stays reasonable, while still
+ensuring historical data is present for comparisons.
+
+
+| Bucket Name | Retention Period | Downsampling Range | Downsampling Aggregation Window | Downsampling Cron | Comments |
+| --- | --- | --- | --- | --- | --- |
+| `metrics` | 15 days | `-2w -1w` | `1w` | weekly on Sunday at 1:00am |
+| `metrics_weekly` | 9 weeks | `-2mo -1mo` | `1mo` | monthly on first day of the month at 1:30am
+| `metrics_monthly` | 25 months | `-2y -1y` | `1y` | yearly on the first day of the year at 2:00am
+| `metrics_yearly` | forever | - | - | - | |
+
+
+After 5 months, here's how may data points should exist in each bucket for one disk
+
+| Bucket Name | Datapoints | Comments |
+| --- | --- | --- |
+| `metrics` | 15 | 7 daily datapoints , up to 7 pending data, 1 buffer data point |
+| `metrics_weekly` | 9 | 4 aggregated weekly data points, 4 pending datapoints, 1 buffer data point |
+| `metrics_monthly` | 3 | 3 aggregated monthly data points | 
+| `metrics_yearly` | 0 | |
+
+After 5 years, here's how may data points should exist in each bucket for one disk
+
+| Bucket Name | Datapoints | Comments |
+| --- | --- | --- |
+| `metrics` | - | - |
+| `metrics_weekly` | - | 
+| `metrics_monthly` | - |
+| `metrics_yearly` | - | 
diff --git a/webapp/backend/pkg/database/scrutiny_repository.go b/webapp/backend/pkg/database/scrutiny_repository.go
index e5429f8..1a99257 100644
--- a/webapp/backend/pkg/database/scrutiny_repository.go
+++ b/webapp/backend/pkg/database/scrutiny_repository.go
@@ -295,8 +295,7 @@ func (sr *scrutinyRepository) DownsampleScript(aggregationType string) string {
   smart_data = from(bucket: sourceBucket)
   |> range(start: rangeStart, stop: rangeEnd)
   |> filter(fn: (r) => r["_measurement"] == "smart" )
-  |> filter(fn: (r) => r["_field"] !~ /(raw_string|_measurement|device_protocol|status_reason|device_wwn|attribute_id|when_failed)/)
-  |> last()
+  |> filter(fn: (r) => r["_field"] !~ /(_measurement|device_protocol|device_wwn|attribute_id|raw_string|status_reason|when_failed)/)
   |> yield(name: "last")
 
   smart_data
@@ -306,7 +305,7 @@ func (sr *scrutinyRepository) DownsampleScript(aggregationType string) string {
   temp_data = from(bucket: sourceBucket)
   |> range(start: rangeStart, stop: rangeEnd)
   |> filter(fn: (r) => r["_measurement"] == "temp")
-  |> last()
+  |> toInt()
   |> yield(name: "mean")
 
   temp_data
@@ -704,20 +703,28 @@ func (sr *scrutinyRepository) lookupNestedDurationKeys(durationKey string) []str
 
 func (sr *scrutinyRepository) aggregateTempQuery(durationKey string) string {
 
-	//TODO: change the query range to a variable.
-	//queryStr := fmt.Sprintf(`
-	//import "influxdata/influxdb/schema"
-	//from(bucket: "%s")
-	//|> range(start: %s, stop: now())
-	//|> filter(fn: (r) => r["_measurement"] == "temp" )
-	//|> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
-	//|> schema.fieldsAsCols()
-	//|> group(columns: ["device_wwn"])
-	//|> yield(name: "last")
-	//	`,
-	//	sr.lookupBucketName(durationKey),
-	//	sr.lookupDuration(durationKey),
-	//)
+	/*
+		import "influxdata/influxdb/schema"
+		weekData = from(bucket: "metrics")
+		  |> range(start: -1w, stop: now())
+		  |> filter(fn: (r) => r["_measurement"] == "temp" )
+		  |> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
+		  |> group(columns: ["device_wwn"])
+		  |> toInt()
+
+		monthData = from(bucket: "metrics_weekly")
+		  |> range(start: -1mo, stop: now())
+		  |> filter(fn: (r) => r["_measurement"] == "temp" )
+		  |> aggregateWindow(every: 1h, fn: mean, createEmpty: false)
+		  |> group(columns: ["device_wwn"])
+		  |> toInt()
+
+		union(tables: [weekData, monthData])
+		  |> group(columns: ["device_wwn"])
+		  |> sort(columns: ["_time"], desc: false)
+		  |> schema.fieldsAsCols()
+
+	*/
 
 	partialQueryStr := []string{`import "influxdata/influxdb/schema"`}