From 371f1dc4510f784cfa3bba0a105adec87c13e05d Mon Sep 17 00:00:00 2001 From: Hugo Persson Date: Wed, 3 Jan 2024 21:59:45 +0100 Subject: [PATCH] Feature/support rest api in scraper (#2810) * Support REST APIs in scraper * Update changelog --- CHANGELOG.md | 4 ++ apps/api/src/app/admin/admin.controller.ts | 4 +- .../data-provider/manual/manual.service.ts | 43 ++++++++++--------- package.json | 1 + yarn.lock | 42 +++++++++++++++++- 5 files changed, 70 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8017e3d0e..c8fe75066 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added support for REST APIs (`JSON`) via the scraper configuration + +### Changed + - Improved the user interface of the access table to share the portfolio ## 2.34.0 - 2024-01-02 diff --git a/apps/api/src/app/admin/admin.controller.ts b/apps/api/src/app/admin/admin.controller.ts index e769325ef..456550110 100644 --- a/apps/api/src/app/admin/admin.controller.ts +++ b/apps/api/src/app/admin/admin.controller.ts @@ -227,8 +227,8 @@ export class AdminController { @Param('symbol') symbol: string ): Promise<{ price: number }> { try { - const { headers, selector, url } = JSON.parse(data.scraperConfiguration); - const price = await this.manualService.test({ headers, selector, url }); + const scraperConfiguration = JSON.parse(data.scraperConfiguration); + const price = await this.manualService.test(scraperConfiguration); if (price) { return { price }; diff --git a/apps/api/src/services/data-provider/manual/manual.service.ts b/apps/api/src/services/data-provider/manual/manual.service.ts index d5a5c7eb3..4ac8bd668 100644 --- a/apps/api/src/services/data-provider/manual/manual.service.ts +++ b/apps/api/src/services/data-provider/manual/manual.service.ts @@ -12,6 +12,7 @@ import { extractNumberFromString, getYesterday } from '@ghostfolio/common/helper'; +import { ScraperConfiguration } from '@ghostfolio/common/interfaces'; import { Granularity } from '@ghostfolio/common/types'; import { Injectable, Logger } from '@nestjs/common'; import { DataSource, SymbolProfile } from '@prisma/client'; @@ -19,6 +20,7 @@ import * as cheerio from 'cheerio'; import { isUUID } from 'class-validator'; import { addDays, format, isBefore } from 'date-fns'; import got, { Headers } from 'got'; +import jsonpath from 'jsonpath'; @Injectable() export class ManualService implements DataProviderInterface { @@ -97,7 +99,7 @@ export class ManualService implements DataProviderInterface { return {}; } - const value = await this.scrape({ headers, selector, url }); + const value = await this.scrape(symbolProfile.scraperConfiguration); return { [symbol]: { @@ -220,23 +222,13 @@ export class ManualService implements DataProviderInterface { return { items }; } - public async test(params: any) { - return this.scrape({ - headers: params.headers, - selector: params.selector, - url: params.url - }); + public async test(scraperConfiguration: ScraperConfiguration) { + return this.scrape(scraperConfiguration); } - private async scrape({ - headers = {}, - selector, - url - }: { - headers?: Headers; - selector: string; - url: string; - }): Promise { + private async scrape( + scraperConfiguration: ScraperConfiguration + ): Promise { try { const abortController = new AbortController(); @@ -244,15 +236,26 @@ export class ManualService implements DataProviderInterface { abortController.abort(); }, this.configurationService.get('REQUEST_TIMEOUT')); - const { body } = await got(url, { - headers, + const { body, headers } = await got(scraperConfiguration.url, { + headers: scraperConfiguration.headers as Headers, // @ts-ignore signal: abortController.signal }); - const $ = cheerio.load(body); + if (headers['content-type'] === 'application/json') { + const data = JSON.parse(body); + const value = String( + jsonpath.query(data, scraperConfiguration.selector)[0] + ); - return extractNumberFromString($(selector).first().text()); + return extractNumberFromString(value); + } else { + const $ = cheerio.load(body); + + return extractNumberFromString( + $(scraperConfiguration.selector).first().text() + ); + } } catch (error) { throw error; } diff --git a/package.json b/package.json index 8b761b2ac..d9379fb34 100644 --- a/package.json +++ b/package.json @@ -110,6 +110,7 @@ "helmet": "7.0.0", "http-status-codes": "2.3.0", "ionicons": "7.1.0", + "jsonpath": "1.1.1", "lodash": "4.17.21", "marked": "9.1.6", "ms": "3.0.0-canary.1", diff --git a/yarn.lock b/yarn.lock index ce4304e00..7854b53ee 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10959,6 +10959,18 @@ escape-string-regexp@^5.0.0: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz#4683126b500b61762f2dbebace1806e8be31b1c8" integrity sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw== +escodegen@^1.8.1: + version "1.14.3" + resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-1.14.3.tgz#4e7b81fba61581dc97582ed78cab7f0e8d63f503" + integrity sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw== + dependencies: + esprima "^4.0.1" + estraverse "^4.2.0" + esutils "^2.0.2" + optionator "^0.8.1" + optionalDependencies: + source-map "~0.6.1" + escodegen@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/escodegen/-/escodegen-2.1.0.tgz#ba93bbb7a43986d29d6041f99f5262da773e2e17" @@ -11187,6 +11199,11 @@ espree@^9.0.0, espree@^9.4.0: acorn-jsx "^5.3.2" eslint-visitor-keys "^3.4.1" +esprima@1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/esprima/-/esprima-1.2.2.tgz#76a0fd66fcfe154fd292667dc264019750b1657b" + integrity sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A== + esprima@^4.0.0, esprima@^4.0.1, esprima@~4.0.0: version "4.0.1" resolved "https://registry.yarnpkg.com/esprima/-/esprima-4.0.1.tgz#13b04cdb3e6c5d19df91ab6987a8695619b0aa71" @@ -11206,7 +11223,7 @@ esrecurse@^4.1.0, esrecurse@^4.3.0: dependencies: estraverse "^5.2.0" -estraverse@^4.1.1: +estraverse@^4.1.1, estraverse@^4.2.0: version "4.3.0" resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-4.3.0.tgz#398ad3f3c5a24948be7725e83d11a7de28cdbd1d" integrity sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw== @@ -14245,6 +14262,15 @@ jsonparse@^1.3.1: resolved "https://registry.yarnpkg.com/jsonparse/-/jsonparse-1.3.1.tgz#3f4dae4a91fac315f71062f8521cc239f1366280" integrity sha512-POQXvpdL69+CluYsillJ7SUhKvytYjW9vG/GKpnf+xP8UWgYEM/RaMzHHofbALDiKbbP1W8UEYmgGl39WkPZsg== +jsonpath@1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/jsonpath/-/jsonpath-1.1.1.tgz#0ca1ed8fb65bb3309248cc9d5466d12d5b0b9901" + integrity sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w== + dependencies: + esprima "1.2.2" + static-eval "2.0.2" + underscore "1.12.1" + jsonwebtoken@9.0.0: version "9.0.0" resolved "https://registry.yarnpkg.com/jsonwebtoken/-/jsonwebtoken-9.0.0.tgz#d0faf9ba1cc3a56255fe49c0961a67e520c1926d" @@ -16051,7 +16077,7 @@ opn@5.3.0: dependencies: is-wsl "^1.1.0" -optionator@^0.8.2: +optionator@^0.8.1, optionator@^0.8.2: version "0.8.3" resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.8.3.tgz#84fa1d036fe9d3c7e21d99884b601167ec8fb495" integrity sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA== @@ -18446,6 +18472,13 @@ standard-as-callback@^2.1.0: resolved "https://registry.yarnpkg.com/standard-as-callback/-/standard-as-callback-2.1.0.tgz#8953fc05359868a77b5b9739a665c5977bb7df45" integrity sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A== +static-eval@2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/static-eval/-/static-eval-2.0.2.tgz#2d1759306b1befa688938454c546b7871f806a42" + integrity sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg== + dependencies: + escodegen "^1.8.1" + static-extend@^0.1.1: version "0.1.2" resolved "https://registry.yarnpkg.com/static-extend/-/static-extend-0.1.2.tgz#60809c39cbff55337226fd5e0b520f341f1fb5c6" @@ -19380,6 +19413,11 @@ unbox-primitive@^1.0.2: has-symbols "^1.0.3" which-boxed-primitive "^1.0.2" +underscore@1.12.1: + version "1.12.1" + resolved "https://registry.yarnpkg.com/underscore/-/underscore-1.12.1.tgz#7bb8cc9b3d397e201cf8553336d262544ead829e" + integrity sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw== + undici-types@~5.26.4: version "5.26.5" resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-5.26.5.tgz#bcd539893d00b56e964fd2657a4866b221a65617"