From ffa2310630fd6dc35ea0616022be32f6219f1d73 Mon Sep 17 00:00:00 2001 From: Paul Pfeister Date: Mon, 6 May 2024 00:49:13 -0400 Subject: [PATCH] Add LinkedIn LinkedIn requires users to be signed in for normal visitation, but certain high-trust crawlers are allowed as well. Here, we masquerade as Googlebot to get past the 999. --- removed_sites.json | 7 ------- removed_sites.md | 7 ------- sherlock/resources/data.json | 10 ++++++++++ 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/removed_sites.json b/removed_sites.json index 87d10c5..365ea2b 100644 --- a/removed_sites.json +++ b/removed_sites.json @@ -87,13 +87,6 @@ "urlMain": "https://kiwifarms.net/", "username_claimed": "blue" }, - "Linkedin": { - "errorMsg": "could not be found", - "errorType": "message", - "url": "https://www.linkedin.com/in/{}", - "urlMain": "https://www.linkedin.com/", - "username_claimed": "alex" - }, "NPM-Package": { "errorType": "status_code", "url": "https://www.npmjs.com/package/{}", diff --git a/removed_sites.md b/removed_sites.md index e946120..dcdddfb 100644 --- a/removed_sites.md +++ b/removed_sites.md @@ -339,13 +339,6 @@ user names were available. }, ``` -## LinkedIn - -This was attempted to be added around 2019-08-26, but the pull request was never merged. -It turns out that LinkedIn requires that you have an account before they will let you -check for other account. So, this site will not work with the current design of -Sherlock. - ## StreamMe On 2019-04-07, I get a Timed Out message from the website. It has not diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index d698d52..4ff705b 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -90,6 +90,16 @@ "urlMain": "https://www.airliners.net/", "username_claimed": "yushinlin" }, + "LinkedIn": { + "url": "https://linkedin.com/in/{}", + "urlMain": "https://linkedin.com", + "request_method": "GET", + "errorType": "status_code", + "headers": { + "User-Agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36" + }, + "username_claimed": "paulpfeister" + }, "Alik.cz": { "errorType": "status_code", "url": "https://www.alik.cz/u/{}",