Classe para obter as palavras usadas para buscar um site. Basta usar o método getWords.
Linguagem: PHP
Copyright 2010 Rubens Takiguti Ribeiro
Licença: LGPL 3 ou superior
/** * Class SearchWords * @author Rubens Takiguti Ribeiro */ class SearchWords { /** * Gets key-words used in search sites by its url. * @param string $referer_url URL to be checked * @return string Words */ public static function getWords($referer_url) { $url_data = parse_url($referer_url); if ($url_data === false) { throw new InvalidArgumentException('Invalid url: '.$referer_url, 1); } if (!isset($url_data['host'])) { return ''; } $host_data = self::parseHost($url_data['host']); switch ($host_data['main_domain']) { // Based on query string case 'google': case 'bing': case 'altavista': case 'aol': case 'galaxy': case 'dibdabdoo': case 'gigablast': case 'alexa': case 'blogscope': case 'icerocket': case 'sphere': case 'technorati': case 'freebooksearch': case 'exalead': case 'nstein': case 'oracle': case 'cheatsearch': case 'mahalo': case 'rollyo': case 'trexy': case 'accoona': case 'alleba': case 'ansearch': case 'daum': case 'guruji': case 'najdi': case 'sapo': case 'search': case 'walla': return self::getQueryParam('q', $url_data['query']); case 'yahoo': return self::getQueryParam('p', $url_data['query']); case 'email-search'; return self::getQueryParam('s', $url_data['query']); case 'saic': case 'onet': return self::getQueryParam('qt', $url_data['query']); case 'baidu': return self::getQueryParam('wd', $url_data['query']); case 'goo': case 'rediff': return self::getQueryParam('MT', $url_data['query']); case 'hotbot': case 'lycos': case 'autonomy': case 'funnelback': case 'vivisimo': case 'naver': case 'rambler': return self::getQueryParam('query', $url_data['query']); case 'kidsclick': return self::getQueryParam('keywords', $url_data['query']); case 'askmenow': return self::getQueryParam('Keywords', $url_data['query']); case 'souq': return self::getQueryParam('s_keyword', $url_data['query']); case 'ifac': return self::getQueryParam('search', $url_data['query']); case 'alibaba': return self::getQueryParam('SearchText', $url_data['query']); case 'mymcpl': return self::getQueryParam('searchq', $url_data['query']); case 'youtube': return self::getQueryParam('search_query', $url_data['query']); case 'blogperfect': return self::getQueryParam('tsearch', $url_data['query']); case 'dieselpoint': return self::getQueryParam('simplequerystring', $url_data['query']); case 'yandex': return self::getQueryParam('text', $url_data['query']); case 'awesomelibrary': return self::getQueryParam('terms', $url_data['query']); case 'endeca': return self::getQueryParam('Nrt', $url_data['query']); // Based on path case 'omgili': case 'eurekster': case 'wink': case 'miner': return urldecode($url_data['path']); case 'wikipedia': return substr($url_data['path'], strrpos($url_data['path'], '/') + 1); case 'excite': $path = explode('/', $url_data['path']); $pos = array_search('Web', $path); if ($pos !== false) { return urldecode($path[$pos + 1]); } return ''; } } /** * Return a parameter value of a query string. * @param $param Parameter to be get * @return string Parameter value */ private static function getQueryParam($param, $query) { parse_str($query, $query_data); if (isset($query_data[$param])) { return $query_data[$param]; } return ''; } /** * Gets host informations. * @param string $host Host to be checked * @return array[string => string] Associative array with domain data. * Potential keys are: * - country * - propose * - main_domain * - sub_domain */ public static function parseHost($host) { $data = array(); $country_domain = self::getCountryDomain(); $propose_domain = self::getProposeDomain(); $host_domains = explode('.', $host); $domain = array_pop($host_domains); if (in_array($domain, $country_domain)) { $data['country'] = $domain; $domain = array_pop($host_domains); if (in_array($domain, $propose_domain)) { $data['propose'] = $domain; $domain = array_pop($host_domains); $data['main_domain'] = $domain; } else { $data['main_domain'] = $domain; } } elseif (in_array($domain, $propose_domain)) { $data['propose'] = $domain; $domain = array_pop($host_domains); $data['main_domain'] = $domain; } if (!empty($host_domains)) { $data['sub_domain'] = implode('.', $host_domains); } return $data; } /** * Return an array of generic proposed domains * @return array[string] */ public static function getProposeDomain() { return array( 'aero', 'asia', 'biz', 'cat', 'co', 'com', 'coop', 'edu', 'gov', 'info', 'int', 'jobs', 'mil', 'mobi', 'museum', 'name', 'net', 'org', 'pro', 'tel', 'travel' ); } /** * Return an array of country domains * @return array[string] */ public static function getCountryDomain() { return array( 'ac', 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as', 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz', 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', 'ec', 'ee', 'eg', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is', 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm', 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my', 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', 'sr', 'st', 'su', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk', 'tl', 'tm', 'tn', 'to', 'tp', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'uk', 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', 'ye', 'yt', 'za', 'zm', 'zw' ); } }
Exemplo de uso:
// Palavras usadas para chegar ao site corrente $url = $_SERVER['HTTP_REFERER']; $words = SearchWords::getWords($url); // Palavras usadas no link $url = 'http://www.google.com.br/#hl=pt-BR&source=hp&biw=1269&bih=567&q=Rubens+Takiguti+Ribeiro&aq=f&aqi=&aql=&oq=&gs_rfai=&fp=45e6118c2d7b8c30'; $words = SearchWords::getWords($url);
0 comentários
Postar um comentário
Nota: fique a vontade para expressar o que achou deste artigo ou do blog.
Dica: para acompanhar as respostas, acesse com uma conta do Google e marque a opção "Notifique-me".
Atenção: o blogger não permite inclusão de tags nos comentários, por isso, use algum site externo para postar seu código com dúvidas e deixe o link aqui. Exemplo: pastebin.com