| tero.co.uk | |
|
|
Remove Accents from URLsThe function below can be used to remove accented characters from URLs. It converts the accented characters into non-accented ones. This function is useful because it doesn't itself include any accented characters, so it's easy to copy and paste and FTP. It only does lower case.
function ReplaceAccents ($s) {
$a = array (
chr(195).chr(167)=>'c', //c with cedilla
chr(231)=>'c',
chr(195).chr(166)=>'ae', //a and e next to each other
chr(230)=>'ae',
chr(197).chr(147)=>'oe', //o and e next to each other
chr(195).chr(161)=>'a', //a acute (small slash from bottom left)
chr(225)=>'a',
chr(195).chr(169)=>'e', //e acute
chr(233)=>'e',
chr(195).chr(173)=>'i', //i acute
chr(237)=>'i',
chr(195).chr(179)=>'o', //o acute
chr(243)=>'o',
chr(195).chr(186)=>'u', //u acute
chr(250)=>'u',
chr(195).chr(160)=>'a', //a grave (small slash from top left)
chr(224)=>'a',
chr(195).chr(168)=>'e', //e grave
chr(232)=>'e',
chr(195).chr(172)=>'i', //i grave
chr(236)=>'i',
chr(195).chr(178)=>'o', //o grave
chr(242)=>'o',
chr(195).chr(185)=>'u', //u grave
chr(249)=>'u',
chr(195).chr(164)=>'a', //a umlaut (two dots)
chr(228)=>'a',
chr(195).chr(171)=>'e', //e umlaut
chr(235)=>'e',
chr(195).chr(175)=>'i', //i umlaut
chr(239)=>'i',
chr(195).chr(182)=>'o', //o umlaut
chr(246)=>'o',
chr(195).chr(188)=>'u', //u umlaut
chr(252)=>'u',
chr(195).chr(191)=>'y', //y umlaut
chr(255)=>'u',
chr(195).chr(162)=>'a', //a circumflex (a little hat)
chr(226)=>'a',
chr(195).chr(170)=>'e', //e circumflex
chr(234)=>'e',
chr(195).chr(174)=>'i', //i circumflex
chr(238)=>'i',
chr(195).chr(180)=>'o', //o circumflex
chr(244)=>'o',
chr(195).chr(187)=>'u', //u circumflex
chr(251)=>'u',
chr(195).chr(165)=>'a', //a with a small ring on top
chr(229)=>'a',
chr(101).chr(0)=>'e', //e
chr(105).chr(0)=>'i', //i
chr(195).chr(184)=>'o', //o with a slash through it
chr(248)=>'o',
chr(117).chr(0)=>'u', //u
);
return strtr ($s, $a);
}
And the RemoveHtmlEntities function below removes HTML entities from a string and GetStringAsUrl takes a string and returns it as something suitable for a URL. This is useful if you are using URL rewriting to turn a page title into the URL.
function RemoveHtmlEntities ($string) {
//return html_entity_decode ($string); //new and different attempt
$trans_tbl = get_html_translation_table (HTML_ENTITIES);
$trans_tbl = array_flip ($trans_tbl);
$trans_tbl[' '] = ' '; //make nbsp's into normal spaces
$r = strtr ($string, $trans_tbl); //translate entities
return preg_replace ("/(\d+);/e", "chr('\\1')", $r); //replace other numerical entities
}
function GetStringAsUrl ($s, $leave='/') {
$s = ReplaceAccents (strtolower (RemoveHtmlEntities (strip_tags ($s)))); //remove tags, make lower case, replace accents
$s = strtolower (strip_tags ($s)); //remove tags, make lower case, replace accents
$s = preg_replace ("~[^\w\d$leave]~", "-", str_replace ("'", '', str_replace (" $leave ", $leave, $s))); //replace with -
return preg_replace ('/^-+/', '', preg_replace ('/-+$/', '', preg_replace ("/[-_]+/", '-', $s))); //remove multiple -
}
For example, if you run this:
echo GetStringAsUrl ('/pages/Great stuff from the Pâtisserie (at low prices)');
You'll get this: /pages/great-stuff-from-the-patisserie-at-low-prices |