fix naming issues

4 years ago · e113947f35
parent 319b34831a
commit e113947f35
7 changed files with 747 additions and 7 deletions
--- a/go.mod
+++ b/go.mod
@ -14,7 +14,7 @@ require (
 	go.uber.org/multierr v1.6.0 // indirect
 	go.uber.org/zap v1.16.0
 	golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a
-	golang.org/x/net v0.0.0-20201002202402-0a1ea396d57c // indirect
+	golang.org/x/net v0.0.0-20201002202402-0a1ea396d57c
 	gorm.io/driver/sqlite v1.1.3
 	gorm.io/gorm v1.20.2
 )
--- a/internal/sanitize/.gitignore
+++ b/internal/sanitize/.gitignore
@ -0,0 +1,22 @@
 # Compiled Object files, Static and Dynamic libs (Shared Objects)
 *.o
 *.a
 *.so
 # Folders
 _obj
 _test
 # Architecture specific extensions/prefixes
 *.[568vq]
 [568vq].out
 *.cgo1.go
 *.cgo2.c
 _cgo_defun.c
 _cgo_gotypes.go
 _cgo_export.*
 _testmain.go
 *.exe
--- a/internal/sanitize/LICENSE
+++ b/internal/sanitize/LICENSE
@ -0,0 +1,27 @@
 Copyright (c) 2017 Mechanism Design. All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
 met:
   * Redistributions of source code must retain the above copyright
 notice, this list of conditions and the following disclaimer.
   * Redistributions in binary form must reproduce the above
 copyright notice, this list of conditions and the following disclaimer
 in the documentation and/or other materials provided with the
 distribution.
   * Neither the name of Google Inc. nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/internal/sanitize/README.md
+++ b/internal/sanitize/README.md
@ -0,0 +1,62 @@
 sanitize [![GoDoc](https://godoc.org/github.com/kennygrant/sanitize?status.svg)](https://godoc.org/github.com/kennygrant/sanitize) [![Go Report Card](https://goreportcard.com/badge/github.com/kennygrant/sanitize)](https://goreportcard.com/report/github.com/kennygrant/sanitize) [![CircleCI](https://circleci.com/gh/kennygrant/sanitize.svg?style=svg)](https://circleci.com/gh/kennygrant/sanitize)
 ========
 Package sanitize provides functions to sanitize html and paths with go (golang).
 FUNCTIONS
 ```go
 sanitize.Accents(s string) string
 ```
 Accents replaces a set of accented characters with ascii equivalents.
 ```go
 sanitize.BaseName(s string) string
 ```
 BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -. Unlike Name no attempt is made to normalise text as a path.
 ```go
 sanitize.HTML(s string) string
 ```
 HTML strips html tags with a very simple parser, replace common entities, and escape < and > in the result. The result is intended to be used as plain text. 
 ```go
 sanitize.HTMLAllowing(s string, args...[]string) (string, error)
 ```
 HTMLAllowing parses html and allow certain tags and attributes from the lists optionally specified by args - args[0] is a list of allowed tags, args[1] is a list of allowed attributes. If either is missing default sets are used. 
 ```go
 sanitize.Name(s string) string
 ```
 Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters.
 ```go
 sanitize.Path(s string) string
 ```
 Path makes a string safe to use as an url path.
 Changes
 -------
 Version 1.2
 Adjusted HTML function to avoid linter warning
 Added more tests from https://githubengineering.com/githubs-post-csp-journey/
 Chnaged name of license file
 Added badges and change log to readme
 Version 1.1
 Fixed type in comments. 
 Merge pull request from Povilas Balzaravicius Pawka 
 - replace br tags with newline even when they contain a space
 Version 1.0
 First release
--- a/internal/sanitize/sanitize.go
+++ b/internal/sanitize/sanitize.go
@ -0,0 +1,388 @@
 // Package sanitize provides functions for sanitizing text.
 package sanitize
 import (
 	"bytes"
 	"html"
 	"html/template"
 	"io"
 	"path"
 	"regexp"
 	"strings"
 	parser "golang.org/x/net/html"
 )
 var (
 	ignoreTags = []string{"title", "script", "style", "iframe", "frame", "frameset", "noframes", "noembed", "embed", "applet", "object", "base"}
 	defaultTags = []string{"h1", "h2", "h3", "h4", "h5", "h6", "div", "span", "hr", "p", "br", "b", "i", "strong", "em", "ol", "ul", "li", "a", "img", "pre", "code", "blockquote", "article", "section"}
 	defaultAttributes = []string{"id", "class", "src", "href", "title", "alt", "name", "rel"}
 )
 // HTMLAllowing sanitizes html, allowing some tags.
 // Arrays of allowed tags and allowed attributes may optionally be passed as the second and third arguments.
 func HTMLAllowing(s string, args ...[]string) (string, error) {
 	allowedTags := defaultTags
 	if len(args) > 0 {
 		allowedTags = args[0]
 	}
 	allowedAttributes := defaultAttributes
 	if len(args) > 1 {
 		allowedAttributes = args[1]
 	}
 	// Parse the html
 	tokenizer := parser.NewTokenizer(strings.NewReader(s))
 	buffer := bytes.NewBufferString("")
 	ignore := ""
 	for {
 		tokenType := tokenizer.Next()
 		token := tokenizer.Token()
 		switch tokenType {
 		case parser.ErrorToken:
 			err := tokenizer.Err()
 			if err == io.EOF {
 				return buffer.String(), nil
 			}
 			return "", err
 		case parser.StartTagToken:
 			if len(ignore) == 0 && includes(allowedTags, token.Data) {
 				token.Attr = cleanAttributes(token.Attr, allowedAttributes)
 				buffer.WriteString(token.String())
 			} else if includes(ignoreTags, token.Data) {
 				ignore = token.Data
 			}
 		case parser.SelfClosingTagToken:
 			if len(ignore) == 0 && includes(allowedTags, token.Data) {
 				token.Attr = cleanAttributes(token.Attr, allowedAttributes)
 				buffer.WriteString(token.String())
 			} else if token.Data == ignore {
 				ignore = ""
 			}
 		case parser.EndTagToken:
 			if len(ignore) == 0 && includes(allowedTags, token.Data) {
 				token.Attr = []parser.Attribute{}
 				buffer.WriteString(token.String())
 			} else if token.Data == ignore {
 				ignore = ""
 			}
 		case parser.TextToken:
 			// We allow text content through, unless ignoring this entire tag and its contents (including other tags)
 			if ignore == "" {
 				buffer.WriteString(token.String())
 			}
 		case parser.CommentToken:
 			// We ignore comments by default
 		case parser.DoctypeToken:
 			// We ignore doctypes by default - html5 does not require them and this is intended for sanitizing snippets of text
 		default:
 			// We ignore unknown token types by default
 		}
 	}
 }
 // HTML strips html tags, replace common entities, and escapes <>&;'" in the result.
 // Note the returned text may contain entities as it is escaped by HTMLEscapeString, and most entities are not translated.
 func HTML(s string) (output string) {
 	// Shortcut strings with no tags in them
 	if !strings.ContainsAny(s, "<>") {
 		output = s
 	} else {
 		// First remove line breaks etc as these have no meaning outside html tags (except pre)
 		// this means pre sections will lose formatting... but will result in less unintentional paras.
 		s = strings.Replace(s, "\n", "", -1)
 		// Then replace line breaks with newlines, to preserve that formatting
 		s = strings.Replace(s, "</p>", "\n", -1)
 		s = strings.Replace(s, "<br>", "\n", -1)
 		s = strings.Replace(s, "</br>", "\n", -1)
 		s = strings.Replace(s, "<br/>", "\n", -1)
 		s = strings.Replace(s, "<br />", "\n", -1)
 		// Walk through the string removing all tags
 		b := bytes.NewBufferString("")
 		inTag := false
 		for _, r := range s {
 			switch r {
 			case '<':
 				inTag = true
 			case '>':
 				inTag = false
 			default:
 				if !inTag {
 					b.WriteRune(r)
 				}
 			}
 		}
 		output = b.String()
 	}
 	// Remove a few common harmless entities, to arrive at something more like plain text
 	output = strings.Replace(output, "&#8216;", "'", -1)
 	output = strings.Replace(output, "&#8217;", "'", -1)
 	output = strings.Replace(output, "&#8220;", "\"", -1)
 	output = strings.Replace(output, "&#8221;", "\"", -1)
 	output = strings.Replace(output, "&nbsp;", " ", -1)
 	output = strings.Replace(output, "&quot;", "\"", -1)
 	output = strings.Replace(output, "&apos;", "'", -1)
 	// Translate some entities into their plain text equivalent (for example accents, if encoded as entities)
 	output = html.UnescapeString(output)
 	// In case we have missed any tags above, escape the text - removes <, >, &, ' and ".
 	output = template.HTMLEscapeString(output)
 	// After processing, remove some harmless entities &, ' and " which are encoded by HTMLEscapeString
 	output = strings.Replace(output, "&#34;", "\"", -1)
 	output = strings.Replace(output, "&#39;", "'", -1)
 	output = strings.Replace(output, "&amp; ", "& ", -1)     // NB space after
 	output = strings.Replace(output, "&amp;amp; ", "& ", -1) // NB space after
 	return output
 }
 // We are very restrictive as this is intended for ascii url slugs
 var illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`)
 // Path makes a string safe to use as a URL path,
 // removing accents and replacing separators with -.
 // The path may still start at / and is not intended
 // for use as a file system path without prefix.
 func Path(s string) string {
 	// Start with lowercase string
 	filePath := strings.ToLower(s)
 	filePath = strings.Replace(filePath, "..", "", -1)
 	filePath = path.Clean(filePath)
 	// Remove illegal characters for paths, flattening accents
 	// and replacing some common separators with -
 	filePath = cleanString(filePath, illegalPath)
 	// NB this may be of length 0, caller must check
 	return filePath
 }
 // Remove all other unrecognised characters apart from
 var illegalName = regexp.MustCompile(`[^[:alnum:]-.]`)
 // Name makes a string safe to use in a file name by first finding the path basename, then replacing non-ascii characters.
 func Name(s string) string {
 	// Start with lowercase string
 	fileName := s
 	fileName = path.Clean(path.Base(fileName))
 	// Remove illegal characters for names, replacing some common separators with -
 	fileName = cleanString(fileName, illegalName)
 	// NB this may be of length 0, caller must check
 	return fileName
 }
 // Replace these separators with -
 var baseNameSeparators = regexp.MustCompile(`[./]`)
 // BaseName makes a string safe to use in a file name, producing a sanitized basename replacing . or / with -.
 // No attempt is made to normalise a path or normalise case.
 func BaseName(s string) string {
 	// Replace certain joining characters with a dash
 	baseName := baseNameSeparators.ReplaceAllString(s, "-")
 	// Remove illegal characters for names, replacing some common separators with -
 	baseName = cleanString(baseName, illegalName)
 	// NB this may be of length 0, caller must check
 	return baseName
 }
 // A very limited list of transliterations to catch common european names translated to urls.
 // This set could be expanded with at least caps and many more characters.
 var transliterations = map[rune]string{
 	'À': "A",
 	'Á': "A",
 	'Â': "A",
 	'Ã': "A",
 	'Ä': "A",
 	'Å': "AA",
 	'Æ': "AE",
 	'Ç': "C",
 	'È': "E",
 	'É': "E",
 	'Ê': "E",
 	'Ë': "E",
 	'Ì': "I",
 	'Í': "I",
 	'Î': "I",
 	'Ï': "I",
 	'Ð': "D",
 	'Ł': "L",
 	'Ñ': "N",
 	'Ò': "O",
 	'Ó': "O",
 	'Ô': "O",
 	'Õ': "O",
 	'Ö': "OE",
 	'Ø': "OE",
 	'Œ': "OE",
 	'Ù': "U",
 	'Ú': "U",
 	'Ü': "UE",
 	'Û': "U",
 	'Ý': "Y",
 	'Þ': "TH",
 	'ẞ': "SS",
 	'à': "a",
 	'á': "a",
 	'â': "a",
 	'ã': "a",
 	'ä': "ae",
 	'å': "aa",
 	'æ': "ae",
 	'ç': "c",
 	'è': "e",
 	'é': "e",
 	'ê': "e",
 	'ë': "e",
 	'ì': "i",
 	'í': "i",
 	'î': "i",
 	'ï': "i",
 	'ð': "d",
 	'ł': "l",
 	'ñ': "n",
 	'ń': "n",
 	'ò': "o",
 	'ó': "o",
 	'ô': "o",
 	'õ': "o",
 	'ō': "o",
 	'ö': "oe",
 	'ø': "oe",
 	'œ': "oe",
 	'ś': "s",
 	'ù': "u",
 	'ú': "u",
 	'û': "u",
 	'ū': "u",
 	'ü': "ue",
 	'ý': "y",
 	'ÿ': "y",
 	'ż': "z",
 	'þ': "th",
 	'ß': "ss",
 }
 // Accents replaces a set of accented characters with ascii equivalents.
 func Accents(s string) string {
 	// Replace some common accent characters
 	b := bytes.NewBufferString("")
 	for _, c := range s {
 		// Check transliterations first
 		if val, ok := transliterations[c]; ok {
 			b.WriteString(val)
 		} else {
 			b.WriteRune(c)
 		}
 	}
 	return b.String()
 }
 var (
 	// If the attribute contains data: or javascript: anywhere, ignore it
 	// we don't allow this in attributes as it is so frequently used for xss
 	// NB we allow spaces in the value, and lowercase.
 	illegalAttr = regexp.MustCompile(`(d\s*a\s*t\s*a|j\s*a\s*v\s*a\s*s\s*c\s*r\s*i\s*p\s*t\s*)\s*:`)
 	// We are far more restrictive with href attributes.
 	legalHrefAttr = regexp.MustCompile(`\A[/#][^/\\]?|mailto:|http://|https://`)
 )
 // cleanAttributes returns an array of attributes after removing malicious ones.
 func cleanAttributes(a []parser.Attribute, allowed []string) []parser.Attribute {
 	if len(a) == 0 {
 		return a
 	}
 	var cleaned []parser.Attribute
 	for _, attr := range a {
 		if includes(allowed, attr.Key) {
 			val := strings.ToLower(attr.Val)
 			// Check for illegal attribute values
 			if illegalAttr.FindString(val) != "" {
 				attr.Val = ""
 			}
 			// Check for legal href values - / mailto:// http:// or https://
 			if attr.Key == "href" {
 				if legalHrefAttr.FindString(val) == "" {
 					attr.Val = ""
 				}
 			}
 			// If we still have an attribute, append it to the array
 			if attr.Val != "" {
 				cleaned = append(cleaned, attr)
 			}
 		}
 	}
 	return cleaned
 }
 // A list of characters we consider separators in normal strings and replace with our canonical separator - rather than removing.
 var (
 	separators = regexp.MustCompile(`[ &_=+:]`)
 	dashes = regexp.MustCompile(`[\-]+`)
 )
 // cleanString replaces separators with - and removes characters listed in the regexp provided from string.
 // Accents, spaces, and all characters not in A-Za-z0-9 are replaced.
 func cleanString(s string, r *regexp.Regexp) string {
 	// Remove any trailing space to avoid ending on -
 	s = strings.Trim(s, " ")
 	// Flatten accents first so that if we remove non-ascii we still get a legible name
 	s = Accents(s)
 	// Replace certain joining characters with a dash
 	//s = separators.ReplaceAllString(s, "-")
 	// Remove all other unrecognised characters - NB we do allow any printable characters
 	//s = r.ReplaceAllString(s, "")
 	// Remove any multiple dashes caused by replacements above
 	s = dashes.ReplaceAllString(s, "-")
 	return s
 }
 // includes checks for inclusion of a string in a []string.
 func includes(a []string, s string) bool {
 	for _, as := range a {
 		if as == s {
 			return true
 		}
 	}
 	return false
 }
--- a/internal/sanitize/sanitize_test.go
+++ b/internal/sanitize/sanitize_test.go
@ -0,0 +1,236 @@
 // Utility functions for working with text
 package sanitize
 import (
 	"testing"
 )
 var Format = "\ninput:    %q\nexpected: %q\noutput:   %q"
 type Test struct {
 	input    string
 	expected string
 }
 // NB the treatment of accents - they are removed and replaced with ascii transliterations
 var urls = []Test{
 	{"ReAd ME.md", `read-me.md`},
 	{"E88E08A7-279C-4CC1-8B90-86DE0D7044_3C.html", `e88e08a7-279c-4cc1-8b90-86de0d7044-3c.html`},
 	{"/user/test/I am a long url's_-?ASDF@£$%£%^testé.html", `/user/test/i-am-a-long-urls-asdfteste.html`},
 	{"/../../4-icon.jpg", `/4-icon.jpg`},
 	{"/Images_dir/../4-icon.jpg", `/images-dir/4-icon.jpg`},
 	{"../4 icon.*", `/4-icon.`},
 	{"Spac ey/Nôm/test før url", `spac-ey/nom/test-foer-url`},
 	{"../*", `/`},
 }
 func TestPath(t *testing.T) {
 	for _, test := range urls {
 		output := Path(test.input)
 		if output != test.expected {
 			t.Fatalf(Format, test.input, test.expected, output)
 		}
 	}
 }
 func BenchmarkPath(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for _, test := range urls {
 			output := Path(test.input)
 			if output != test.expected {
 				b.Fatalf(Format, test.input, test.expected, output)
 			}
 		}
 	}
 }
 var fileNames = []Test{
 	{"ReAd ME.md", `read-me.md`},
 	{"/var/etc/jobs/go/go/src/pkg/foo/bar.go", `bar.go`},
 	{"I am a long url's_-?ASDF@£$%£%^é.html", `i-am-a-long-urls-asdfe.html`},
 	{"/../../4-icon.jpg", `4-icon.jpg`},
 	{"/Images/../4-icon.jpg", `4-icon.jpg`},
 	{"../4 icon.jpg", `4-icon.jpg`},
 	{"../4 icon-testé *8%^\"'\".jpg ", `4-icon-teste-8.jpg`},
 	{"Überfluß an Döner macht schöner.JPEG", `ueberfluss-an-doener-macht-schoener.jpeg`},
 	{"Ä-_-Ü_:()_Ö-_-ä-_-ü-_-ö-_ß.webm", `ae-ue-oe-ae-ue-oe-ss.webm`},
 }
 func TestName(t *testing.T) {
 	for _, test := range fileNames {
 		output := Name(test.input)
 		if output != test.expected {
 			t.Fatalf(Format, test.input, test.expected, output)
 		}
 	}
 }
 func BenchmarkName(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for _, test := range fileNames {
 			output := Name(test.input)
 			if output != test.expected {
 				b.Fatalf(Format, test.input, test.expected, output)
 			}
 		}
 	}
 }
 var baseFileNames = []Test{
 	{"The power & the Glory jpg file. The end", `The-power-the-Glory-jpg-file-The-end`},
 	{"/../../4-iCoN.jpg", `-4-iCoN-jpg`},
 	{"And/Or", `And-Or`},
 	{"Sonic.EXE", `Sonic-EXE`},
 	{"012: #Fetch for Defaults", `012-Fetch-for-Defaults`},
 }
 func TestBaseName(t *testing.T) {
 	for _, test := range baseFileNames {
 		output := BaseName(test.input)
 		if output != test.expected {
 			t.Fatalf(Format, test.input, test.expected, output)
 		}
 	}
 }
 // Test with some malformed or malicious html
 // NB because we remove all tokens after a < until the next >
 // and do not attempt to parse, we should be safe from invalid html,
 // but will sometimes completely empty the string if we have invalid input
 // Note we sometimes use " in order to keep things on one line and use the ` character
 var htmlTests = []Test{
 	{`&nbsp;`, " "},
 	{`&amp;#x000D;`, `&amp;#x000D;`},
 	{`<invalid attr="invalid"<,<p><p><p><p><p>`, ``},
 	{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "Bold  Not bold\nAlso not bold."},
 	{`FOO&#x000D;ZOO`, "FOO\rZOO"},
 	{`<script><!--<script </s`, ``},
 	{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `test`},
 	{`<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>`, ` or ***************aaaaaaaaaaaaaaaaaaaaaaaaaa`},
 	{`<p>Some text</p><frameset src="testing.html"></frameset>`, "Some text\n"},
 	{`Something<br/>Some more`, "Something\nSome more"},
 	{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.<//data>><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">">><><img src="">`, "This is a 'test' of bold & italic \n invalid markup.. \""},
 	{`<![CDATA[<sender>John Smith</sender>]]>`, `John Smith]]`},
 	{`<!-- <script src='blah.js' data-rel='fsd'> --> This is text`, ` -- This is text`},
 	{`<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>`, `body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}`},
 	{`&lt;iframe src="" attr=""&gt;>>>>>`, `&lt;iframe src="" attr=""&gt;`},
 	{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `alert("XSS")"`},
 	{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
 	{`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
 	{`<IMG SRC="javascript:alert('XSS')" <test`, ``},
 	{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, ``},
 	{`&gt & test &lt`, `&gt; & test &lt;`},
 	{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, ``},
 	{`&#8220;hello&#8221; it&#8217;s for &#8216;real&#8217;`, `"hello" it's for 'real'`},
 	{`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
 #0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, ``},
 	{`'';!--"<XSS>=&{()}`, `'';!--"=&amp;{()}`},
 	{"LINE 1<br />\nLINE 2", "LINE 1\nLINE 2"},
 	// Examples from https://githubengineering.com/githubs-post-csp-journey/
 	{`<img src='https://example.com/log_csrf?html=`, ``},
 	{`<img src='https://example.com/log_csrf?html=
 <form action="https://example.com/account/public_keys/19023812091023">
 ...
 <input type="hidden" name="csrf_token" value="some_csrf_token_value">
 </form>`, `...`},
 	{`<img src='https://example.com?d=https%3A%2F%2Fsome-evil-site.com%2Fimages%2Favatar.jpg%2f
 	<p>secret</p>`, `secret
 `},
 	{`<form action="https://some-evil-site.com"><button>Click</button><textarea name='
 <!-- </textarea> --><!-- '" -->
 <form action="/logout">
  <input name="authenticity_token" type="hidden" value="secret1">
 </form>`, `Click --  `},
 }
 func TestHTML(t *testing.T) {
 	for _, test := range htmlTests {
 		output := HTML(test.input)
 		if output != test.expected {
 			t.Fatalf(Format, test.input, test.expected, output)
 		}
 	}
 }
 var htmlTestsAllowing = []Test{
 	{`<IMG SRC="jav&#x0D;ascript:alert('XSS');">`, `<img>`},
 	{`<i>hello world</i href="javascript:alert('hello world')">`, `<i>hello world</i>`},
 	{`hello<br ><br / ><hr /><hr    >rulers`, `hello<br><br><hr/><hr>rulers`},
 	{`<span class="testing" id="testid" name="testname" style="font-color:red;text-size:gigantic;"><p>Span</p></span>`, `<span class="testing" id="testid" name="testname"><p>Span</p></span>`},
 	{`<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`, `<div class="divclass">Div</div><h4><h3>test</h4>invalid</h3><p>test</p>`},
 	{`<p>Some text</p><exotic><iframe>test</iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
 	{`<b>hello world</b>`, `<b>hello world</b>`},
 	{`text<p>inside<p onclick='alert()'/>too`, `text<p>inside<p/>too`},
 	{`&amp;#x000D;`, `&amp;#x000D;`},
 	{`<invalid attr="invalid"<,<p><p><p><p><p>`, `<p><p><p><p>`},
 	{"<b><p>Bold </b> Not bold</p>\nAlso not bold.", "<b><p>Bold </b> Not bold</p>\nAlso not bold."},
 	{"`FOO&#x000D;ZOO", "`FOO&#13;ZOO"},
 	{`<script><!--<script </s`, ``},
 	{`<a href="/" alt="Fab.com | Aqua Paper Map 22"" title="Fab.com | Aqua Paper Map 22" - fab.com">test</a>`, `<a href="/" alt="Fab.com | Aqua Paper Map 22" title="Fab.com | Aqua Paper Map 22">test</a>`},
 	{"<p</p>?> or <p id=0</p> or <<</>><ASDF><@$!@£M<<>>>>>>>>>>>>>><>***************aaaaaaaaaaaaaaaaaaaaaaaaaa>", "?&gt; or <p id=\"0&lt;/p\"> or &lt;&lt;&gt;&lt;@$!@£M&lt;&lt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&gt;&lt;&gt;***************aaaaaaaaaaaaaaaaaaaaaaaaaa&gt;"},
 	{`<p>Some text</p><exotic><iframe><frameset src="testing.html"></frameset>`, `<p>Some text</p>`},
 	{"Something<br/>Some more", `Something<br/>Some more`},
 	{`<a href="http://www.example.com"?>This is a 'test' of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.</data><alert><script CDATA[:Asdfjk2354115nkjafdgs]>. <div src=">escape;inside script tag"><img src="">`, `<a href="http://www.example.com">This is a &#39;test&#39; of <b>bold</b> &amp; <i>italic</i></a> <br/> invalid markup.`},
 	{"<sender ignore=me>John Smith</sender>", `John Smith`},
 	{"<!-- <script src='blah.js' data-rel='fsd'> --> This is text", ` This is text`},
 	{"<style>body{background-image:url(http://www.google.com/intl/en/images/logo.gif);}</style>", ``},
 	{`&lt;iframe src="" attr=""&gt;`, `&lt;iframe src=&#34;&#34; attr=&#34;&#34;&gt;`},
 	{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
 	{`<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img>`},
 	{`<IMG SRC=JaVaScRiPt:alert('XSS')&gt;`, ``},
 	{`<IMG SRC="javascript:alert('XSS')">>> <test`, `<img>&gt;&gt; `},
 	{`&gt & test &lt`, `&gt; &amp; test &lt;`},
 	{`<img></IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>`, `<img></img>`},
 	{`<img src="data:text/javascript;alert('alert');">`, `<img>`},
 	{`<iframe src=http://... <`, ``},
 	{`<iframe src="data:CSS"><img><a><</a>;sdf<iframe>`, ``},
 	{`<img src=javascript:alert(document.cookie)>`, `<img>`},
 	{`<?php echo('hello world')>`, ``},
 	{`Hello <STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A>World`, `Hello <a class="XSS"></a>World`},
 	{`<a href="javascript:alert('XSS1')" onmouseover="alert('XSS2')">XSS<a>`, `<a>XSS<a>`},
 	{`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`,
 		`<a href="http://www.google.com/"><img src="https://ssl.gstatic.com/accounts/ui/logo_2x.png"/></a>`},
 	{`<a href="javascript:alert(&#39;XSS1&#39;)" "document.write('<HTML> Tags and markup');">XSS<a>`, `<a> Tags and markup&#39;);&#34;&gt;XSS<a>`},
 	{`<a <script>document.write("UNTRUSTED INPUT: " + document.location.hash);<script/> >`, `<a>document.write(&#34;UNTRUSTED INPUT: &#34; + document.location.hash); &gt;`},
 	{`<a href="#anchor">foo</a>`, `<a href="#anchor">foo</a>`},
 	{`<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>`, `<img>`},
 	{`<IMG SRC="jav	ascript:alert('XSS');">`, `<img>`},
 	{`<IMG SRC="jav&#x09;ascript:alert('XSS');">`, `<img>`},
 	{`<HEAD><META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=UTF-7"> </HEAD>+ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4-`, ` +ADw-SCRIPT+AD4-alert(&#39;XSS&#39;);+ADw-/SCRIPT+AD4-`},
 	{`<SCRIPT>document.write("<SCRI");</SCRIPT>PT SRC="http://ha.ckers.org/xss.js"></SCRIPT>`, `PT SRC=&#34;http://ha.ckers.org/xss.js&#34;&gt;`},
 	{`<a href="javascript:alert('XSS')" src="javascript:alert('XSS')" onclick="javascript:alert('XSS')"></a>`, `<a></a>`},
 	{`'';!--"<XSS>=&{()}`, `&#39;&#39;;!--&#34;=&amp;{()}`},
 	{`<IMG SRC=javascript:alert('XSS')`, ``},
 	{`<IMG """><SCRIPT>alert("XSS")</SCRIPT>">`, `<img>&#34;&gt;`},
 	{`<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&
 #0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>`, `<img>`},
 	{`<a href="mailto:cool@test.com?subject=cooool">cool guy</a>`, `<a href="mailto:cool@test.com?subject=cooool">cool guy</a>`},
 }
 func TestHTMLAllowed(t *testing.T) {
 	for _, test := range htmlTestsAllowing {
 		output, err := HTMLAllowing(test.input)
 		if err != nil {
 			t.Fatalf(Format, test.input, test.expected, output, err)
 		}
 		if output != test.expected {
 			t.Fatalf(Format, test.input, test.expected, output)
 		}
 	}
 }
 func BenchmarkHTMLAllowed(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		for _, test := range htmlTestsAllowing {
 			output, err := HTMLAllowing(test.input)
 			if err != nil {
 				b.Fatalf(Format, test.input, test.expected, output, err)
 			}
 			if output != test.expected {
 				b.Fatalf(Format, test.input, test.expected, output)
 			}
 		}
 	}
 }
--- a/service/fileService.go
+++ b/service/fileService.go
@ -15,6 +15,7 @@ import (
 	"strconv"
 	"time"
 	"github.com/akhilrex/podgrab/internal/sanitize"
 	stringy "github.com/gobeam/stringy"
 )
@ -185,8 +186,8 @@ func httpClient() *http.Client {
 }
 func createFolder(folder string, parent string) string {
-	str := stringy.New(folder)
+	folder = cleanFileName(folder)
-	folder = str.RemoveSpecialCharacter()
+	//str := stringy.New(folder)
 	folderPath := path.Join(parent, folder)
 	if _, err := os.Stat(folderPath); os.IsNotExist(err) {
 		os.MkdirAll(folderPath, 0777)
@ -197,11 +198,11 @@ func createFolder(folder string, parent string) string {
 func createDataFolderIfNotExists(folder string) string {
 	dataPath := os.Getenv("DATA")
-	return createFolder(folder,dataPath)
+	return createFolder(folder, dataPath)
 }
 func createConfigFolderIfNotExists(folder string) string {
 	dataPath := os.Getenv("CONFIG")
-	return createFolder(folder,dataPath)
+	return createFolder(folder, dataPath)
 }
 func getFileName(link string, title string, defaultExtension string) string {
@ -214,12 +215,16 @@ func getFileName(link string, title string, defaultExtension string) string {
 	if len(ext) == 0 {
 		ext = defaultExtension
 	}
-	str := stringy.New(title)
+	//str := stringy.New(title)
-	str = stringy.New(str.RemoveSpecialCharacter())
+	str := stringy.New(cleanFileName(title))
 	return str.KebabCase().Get() + ext
 }
 func cleanFileName(original string) string {
 	return sanitize.Name(original)
 }
 func checkError(err error) {
 	if err != nil {
 		panic(err)