From ffa2310630fd6dc35ea0616022be32f6219f1d73 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 00:49:13 -0400 Subject: [PATCH 1/3] Add LinkedIn LinkedIn requires users to be signed in for normal visitation, but certain high-trust crawlers are allowed as well. Here, we masquerade as Googlebot to get past the 999. --- removed_sites.json | 7 ------- removed_sites.md | 7 ------- sherlock/resources/data.json | 10 ++++++++++ 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/removed_sites.json b/removed_sites.json index 87d10c5..365ea2b 100644 --- a/removed_sites.json +++ b/removed_sites.json @@ -87,13 +87,6 @@ "urlMain": "https://kiwifarms.net/", "username_claimed": "blue" }, - "Linkedin": { - "errorMsg": "could not be found", - "errorType": "message", - "url": "https://www.linkedin.com/in/{}", - "urlMain": "https://www.linkedin.com/", - "username_claimed": "alex" - }, "NPM-Package": { "errorType": "status_code", "url": "https://www.npmjs.com/package/{}", diff --git a/removed_sites.md b/removed_sites.md index e946120..dcdddfb 100644 --- a/removed_sites.md +++ b/removed_sites.md @@ -339,13 +339,6 @@ user names were available. }, ``` -## LinkedIn - -This was attempted to be added around 2019-08-26, but the pull request was never merged. -It turns out that LinkedIn requires that you have an account before they will let you -check for other account. So, this site will not work with the current design of -Sherlock. - ## StreamMe On 2019-04-07, I get a Timed Out message from the website. It has not diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index d698d52..4ff705b 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -90,6 +90,16 @@ "urlMain": "https://www.airliners.net/", "username_claimed": "yushinlin" }, + "LinkedIn": { + "url": "https://linkedin.com/in/{}", + "urlMain": "https://linkedin.com", + "request_method": "GET", + "errorType": "status_code", + "headers": { + "User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36" + }, + "username_claimed": "paulpfeister" + }, "Alik.cz": { "errorType": "status_code", "url": "https://www.alik.cz/u/{}", From d8aaeea1684d774de793d031eb16f100054324ad Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 01:09:31 -0400 Subject: [PATCH 2/3] Add regex --- sherlock/resources/data.json | 1 + 1 file changed, 1 insertion(+) diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index 4ff705b..75450f4 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -93,6 +93,7 @@ "LinkedIn": { "url": "https://linkedin.com/in/{}", "urlMain": "https://linkedin.com", + "regexCheck": "^[a-zA-Z0-9]{3,100}$", "request_method": "GET", "errorType": "status_code", "headers": { From 31ab96675caae23c18f80dc724f2db4fbe9b78f3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 10:13:08 +0000 Subject: [PATCH 3/3] Updated Site List --- sherlock/resources/data.json | 22 +++++++++++----------- sites.md | 3 ++- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index f6b495d..6d0e5ff 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -90,17 +90,6 @@ "urlMain": "https://www.airliners.net/", "username_claimed": "yushinlin" }, - "LinkedIn": { - "url": "https://linkedin.com/in/{}", - "urlMain": "https://linkedin.com", - "regexCheck": "^[a-zA-Z0-9]{3,100}$", - "request_method": "GET", - "errorType": "status_code", - "headers": { - "User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36" - }, - "username_claimed": "paulpfeister" - }, "Alik.cz": { "errorType": "status_code", "url": "https://www.alik.cz/u/{}", @@ -1265,6 +1254,17 @@ "urlMain": "https://lichess.org", "username_claimed": "blue" }, + "LinkedIn": { + "errorType": "status_code", + "headers": { + "User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36" + }, + "regexCheck": "^[a-zA-Z0-9]{3,100}$", + "request_method": "GET", + "url": "https://linkedin.com/in/{}", + "urlMain": "https://linkedin.com", + "username_claimed": "paulpfeister" + }, "Linktree": { "errorMsg": "\"statusCode\":404", "errorType": "message", diff --git a/sites.md b/sites.md index 730105b..393f576 100644 --- a/sites.md +++ b/sites.md @@ -1,4 +1,4 @@ -## List Of Supported Sites (400 Sites In Total!) +## List Of Supported Sites (401 Sites In Total!) 1. ![](https://www.google.com/s2/favicons?domain=https://www.1337x.to/) [1337x](https://www.1337x.to/) 1. ![](https://www.google.com/s2/favicons?domain=https://2Dimensions.com/) [2Dimensions](https://2Dimensions.com/) 1. ![](https://www.google.com/s2/favicons?domain=http://forum.3dnews.ru/) [3dnews](http://forum.3dnews.ru/) @@ -180,6 +180,7 @@ 1. ![](https://www.google.com/s2/favicons?domain=https://www.lesswrong.com/) [LessWrong](https://www.lesswrong.com/) 1. ![](https://www.google.com/s2/favicons?domain=https://letterboxd.com/) [Letterboxd](https://letterboxd.com/) 1. ![](https://www.google.com/s2/favicons?domain=https://lichess.org) [Lichess](https://lichess.org) +1. ![](https://www.google.com/s2/favicons?domain=https://linkedin.com) [LinkedIn](https://linkedin.com) 1. ![](https://www.google.com/s2/favicons?domain=https://linktr.ee/) [Linktree](https://linktr.ee/) 1. ![](https://www.google.com/s2/favicons?domain=https://listed.to/) [Listed](https://listed.to/) 1. ![](https://www.google.com/s2/favicons?domain=https://www.livejournal.com/) [LiveJournal](https://www.livejournal.com/)