From 005890d7856919fecc790907821bb596e3f17cc5 Mon Sep 17 00:00:00 2001 From: Thomas Kaul <4159106+dtslvr@users.noreply.github.com> Date: Sat, 13 Jan 2024 16:17:38 +0100 Subject: [PATCH] Improve extractNumberFromString() for international number formats (#2843) * Set up test * Add support for international formatted numbers * Expose locale in scraper configuration * Update changelog --- CHANGELOG.md | 2 + apps/api/src/app/info/info.service.ts | 6 +-- .../data-provider/manual/manual.service.ts | 16 ++++++-- .../symbol-profile/symbol-profile.service.ts | 1 + libs/common/src/lib/helper.spec.ts | 39 +++++++++++++++++++ libs/common/src/lib/helper.ts | 19 ++++++--- .../scraper-configuration.interface.ts | 1 + package.json | 5 ++- yarn.lock | 14 +++++++ 9 files changed, 90 insertions(+), 13 deletions(-) create mode 100644 libs/common/src/lib/helper.spec.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index aa8134e94..f1b1cb783 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Broken down the performance into asset and currency on the analysis page (experimental) +- Added support for international formatted numbers in the scraper configuration +- Added the attribute `locale` to the scraper configuration to parse the number ### Changed diff --git a/apps/api/src/app/info/info.service.ts b/apps/api/src/app/info/info.service.ts index a93070cd1..cf28d9d54 100644 --- a/apps/api/src/app/info/info.service.ts +++ b/apps/api/src/app/info/info.service.ts @@ -195,11 +195,11 @@ export class InfoService { const $ = cheerio.load(body); - return extractNumberFromString( - $( + return extractNumberFromString({ + value: $( `a[href="/ghostfolio/ghostfolio/graphs/contributors"] .Counter` ).text() - ); + }); } catch (error) { Logger.error(error, 'InfoService - GitHub'); diff --git a/apps/api/src/services/data-provider/manual/manual.service.ts b/apps/api/src/services/data-provider/manual/manual.service.ts index 4ac8bd668..77b96e3ac 100644 --- a/apps/api/src/services/data-provider/manual/manual.service.ts +++ b/apps/api/src/services/data-provider/manual/manual.service.ts @@ -236,6 +236,7 @@ export class ManualService implements DataProviderInterface { abortController.abort(); }, this.configurationService.get('REQUEST_TIMEOUT')); + let locale = scraperConfiguration.locale; const { body, headers } = await got(scraperConfiguration.url, { headers: scraperConfiguration.headers as Headers, // @ts-ignore @@ -248,13 +249,20 @@ export class ManualService implements DataProviderInterface { jsonpath.query(data, scraperConfiguration.selector)[0] ); - return extractNumberFromString(value); + return extractNumberFromString({ locale, value }); } else { const $ = cheerio.load(body); - return extractNumberFromString( - $(scraperConfiguration.selector).first().text() - ); + if (!locale) { + try { + locale = $('html').attr('lang'); + } catch {} + } + + return extractNumberFromString({ + locale, + value: $(scraperConfiguration.selector).first().text() + }); } } catch (error) { throw error; diff --git a/apps/api/src/services/symbol-profile/symbol-profile.service.ts b/apps/api/src/services/symbol-profile/symbol-profile.service.ts index 37671c4e2..5f808b3db 100644 --- a/apps/api/src/services/symbol-profile/symbol-profile.service.ts +++ b/apps/api/src/services/symbol-profile/symbol-profile.service.ts @@ -202,6 +202,7 @@ export class SymbolProfileService { defaultMarketPrice: scraperConfiguration.defaultMarketPrice as number, headers: scraperConfiguration.headers as ScraperConfiguration['headers'], + locale: scraperConfiguration.locale as string, selector: scraperConfiguration.selector as string, url: scraperConfiguration.url as string }; diff --git a/libs/common/src/lib/helper.spec.ts b/libs/common/src/lib/helper.spec.ts new file mode 100644 index 000000000..22a171168 --- /dev/null +++ b/libs/common/src/lib/helper.spec.ts @@ -0,0 +1,39 @@ +import { extractNumberFromString } from '@ghostfolio/common/helper'; + +describe('Helper', () => { + describe('Extract number from string', () => { + it('Get decimal number', async () => { + expect(extractNumberFromString({ value: '999.99' })).toEqual(999.99); + }); + + it('Get decimal number (with spaces)', async () => { + expect(extractNumberFromString({ value: ' 999.99 ' })).toEqual(999.99); + }); + + it('Get decimal number (with currency)', async () => { + expect(extractNumberFromString({ value: '999.99 CHF' })).toEqual(999.99); + }); + + it('Get decimal number (comma notation)', async () => { + expect( + extractNumberFromString({ locale: 'de-DE', value: '999,99' }) + ).toEqual(999.99); + }); + + it('Get decimal number with group (dot notation)', async () => { + expect( + extractNumberFromString({ locale: 'de-CH', value: '99’999.99' }) + ).toEqual(99999.99); + }); + + it('Get decimal number with group (comma notation)', async () => { + expect( + extractNumberFromString({ locale: 'de-DE', value: '99.999,99' }) + ).toEqual(99999.99); + }); + + it('Not a number', async () => { + expect(extractNumberFromString({ value: 'X' })).toEqual(NaN); + }); + }); +}); diff --git a/libs/common/src/lib/helper.ts b/libs/common/src/lib/helper.ts index 16d5d041b..6afecc398 100644 --- a/libs/common/src/lib/helper.ts +++ b/libs/common/src/lib/helper.ts @@ -1,4 +1,5 @@ import * as currencies from '@dinero.js/currencies'; +import { NumberParser } from '@internationalized/number'; import { DataSource, MarketData } from '@prisma/client'; import Big from 'big.js'; import { @@ -20,8 +21,6 @@ export const DATE_FORMAT = 'yyyy-MM-dd'; export const DATE_FORMAT_MONTHLY = 'MMMM yyyy'; export const DATE_FORMAT_YEARLY = 'yyyy'; -const NUMERIC_REGEXP = /[-]{0,1}[\d]*[.,]{0,1}[\d]+/g; - export function calculateBenchmarkTrend({ days, historicalData @@ -120,10 +119,20 @@ export function encodeDataSource(aDataSource: DataSource) { return undefined; } -export function extractNumberFromString(aString: string): number { +export function extractNumberFromString({ + locale = 'en-US', + value +}: { + locale?: string; + value: string; +}): number { try { - const [numberString] = aString.match(NUMERIC_REGEXP); - return parseFloat(numberString.trim()); + // Remove non-numeric characters (excluding international formatting characters) + const numericValue = value.replace(/[^\d.,'’\s]/g, ''); + + let parser = new NumberParser(locale); + + return parser.parse(numericValue); } catch { return undefined; } diff --git a/libs/common/src/lib/interfaces/scraper-configuration.interface.ts b/libs/common/src/lib/interfaces/scraper-configuration.interface.ts index 0446459ad..ef5506328 100644 --- a/libs/common/src/lib/interfaces/scraper-configuration.interface.ts +++ b/libs/common/src/lib/interfaces/scraper-configuration.interface.ts @@ -1,6 +1,7 @@ export interface ScraperConfiguration { defaultMarketPrice?: number; headers?: { [key: string]: string }; + locale?: string; selector: string; url: string; } diff --git a/package.json b/package.json index e6411ed72..0d37bcc08 100644 --- a/package.json +++ b/package.json @@ -44,7 +44,9 @@ "start:production": "yarn database:migrate && yarn database:seed && node main", "start:server": "nx run api:serve --watch", "start:storybook": "nx run ui:storybook", - "test": "npx dotenv-cli -e .env.example -- nx test", + "test": "yarn test:api && yarn test:common", + "test:api": "npx dotenv-cli -e .env.example -- nx test api", + "test:common": "npx dotenv-cli -e .env.example -- nx test common", "test:single": "nx run api:test --test-file portfolio-calculator-novn-buy-and-sell.spec.ts", "ts-node": "ts-node", "update": "nx migrate latest", @@ -71,6 +73,7 @@ "@dfinity/identity": "0.15.7", "@dfinity/principal": "0.15.7", "@dinero.js/currencies": "2.0.0-alpha.8", + "@internationalized/number": "3.5.0", "@nestjs/bull": "10.0.1", "@nestjs/cache-manager": "2.1.0", "@nestjs/common": "10.1.3", diff --git a/yarn.lock b/yarn.lock index 7854b53ee..fd7e3a549 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3063,6 +3063,13 @@ resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.1.tgz#e5211452df060fa8522b55c7b3c0c4d1981cb044" integrity sha512-dvuCeX5fC9dXgJn9t+X5atfmgQAzUOWqS1254Gh0m6i8wKd10ebXkfNKiRK+1GWi/yTvvLDHpoxLr0xxxeslWw== +"@internationalized/number@3.5.0": + version "3.5.0" + resolved "https://registry.yarnpkg.com/@internationalized/number/-/number-3.5.0.tgz#9de6018424b441a6545f209afa286ad7df4a2906" + integrity sha512-ZY1BW8HT9WKYvaubbuqXbbDdHhOUMfE2zHHFJeTppid0S+pc8HtdIxFxaYMsGjCb4UsF+MEJ4n2TfU7iHnUK8w== + dependencies: + "@swc/helpers" "^0.5.0" + "@ioredis/commands@^1.1.1": version "1.2.0" resolved "https://registry.yarnpkg.com/@ioredis/commands/-/commands-1.2.0.tgz#6d61b3097470af1fdbbe622795b8921d42018e11" @@ -6201,6 +6208,13 @@ resolved "https://registry.yarnpkg.com/@swc/counter/-/counter-0.1.2.tgz#bf06d0770e47c6f1102270b744e17b934586985e" integrity sha512-9F4ys4C74eSTEUNndnER3VJ15oru2NumfQxS8geE+f3eB5xvfxpWyqE5XlVnxb/R14uoXi6SLbBwwiDSkv+XEw== +"@swc/helpers@^0.5.0": + version "0.5.3" + resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.3.tgz#98c6da1e196f5f08f977658b80d6bd941b5f294f" + integrity sha512-FaruWX6KdudYloq1AHD/4nU+UsMTdNE8CKyrseXWEcgjDAbvkwJg2QGPAnfIJLIWsjZOSPLOAykK6fuYp4vp4A== + dependencies: + tslib "^2.4.0" + "@swc/types@^0.1.5": version "0.1.5" resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.5.tgz#043b731d4f56a79b4897a3de1af35e75d56bc63a"