TightURL

TightURL Git Source Tree

Root/tighturl.php

1<?php
2/**
3 * TightURL :: A blind redirection service
4 *
5 * Copyright (c) 2004-2008, Ron Guerin <ron@vnetworx.net>
6 * portions Copyright (c) 2002,2003 Free Software Foundation
7 *
8 * This file implements a blind redirection service named TightURL.
9 * TightURL is Free Software; you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * TightURL is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
17 * See the GNU General Public License for more details.
18 *
19 * If you are not able to view the LICENSE, which should
20 * always be possible within a valid and working TightURL release,
21 * please write to the Free Software Foundation, Inc.,
22 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * to get a copy of the GNU General Public License or to report a
24 * possible license violation.
25 *
26 * @package TightURL
27 * @author Ron Guerin <ron@vnetworx.net>
28 * @license http://www.fsf.org/licenses/gpl.html GNU Public License
29 * @copyright Copyright &copy; 2004-2009 Ron Guerin
30 * @filesource
31 * @link http://tighturl.com TightURL
32 * @version 0.1.4
33 *
34 */
35
36define("VERSION", "0.1.4");
37define("REQUIRED_PHP_VERSION", "4.3.0");
38
39// System defaults, DO NOT EDIT THIS FILE
40// Edit tighturl.config.inc.php instead!
41
42global $copyright, $conn, $db, $os, $svcname;
43
44$dbhost = "localhost";
45$dbuser = "dbuser";
46$dbpass = "dbpass";
47$dbname = "tighturl";
48$dbtable = "urls";
49$FOFMethod=FALSE; //0=Full URL path or mod_rewrite, 1=404-Method compressed URLs
50$os="";
51
52// URIBL variables
53$uribl = array("multi.surbl.org", "black.uribl.com");
54$uriblurl = array("www.surbl.org", "www.uribl.com");
55
56// Bad Behavior variables
57$BB2 = true;
58$BBstats = true;
59$BBstrict = false;
60$BBverbose = true;
61$BBLogging = true;
62$bb2_settings_defaults = "";
63
64// Require submitted URLs to exist?
65$mustexist = true;
66
67// Text strings and style variables
68$svcname = "URLSquisher";
69$verbtext = "Squish";
70$pasttext = "Squished";
71$tagline = "Squish long URLs to make short ones";
72$headcolor = "#006600";
73$tablecolor = "#00CC99";
74$copystart = date("Y");
75$copyrightholder = "SquishURL Enterprises";
76
77// Reserved URLs
78$ReservedURL = array("x", "rest", "xmlrpc", "soap", "xml", "atom", "rss", "blog",
79                     "faq", "help", "about", "api", "code", "source", "docs",
80                     "git", "cvs", "arch", "url", "admin", "setup", "svn", "project",
81                     "abuse", "cgi-sys", "exploited");
82
83// You REALLY don't want to edit below here unless you know what you're doing.
84
85// *************************************************************************
86
87  if (version_compare(phpversion(), REQUIRED_PHP_VERSION)<0) {
88    die_HTML($svcname, "Error: TightURL ".VERSION." needs PHP >= ".REQUIRED_PHP_VERSION." (you are using ".phpversion().")");
89  }
90
91  if (file_exists("tighturl-install.php")) die_HTML($svcname, "Error: You must remove tighturl-install.php before using $svcname.");
92
93  $os=strpos(strtolower(PHP_OS), "win")===false?"nix":"win";
94
95  $validurlpattern = "\:\/\/([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)"
96   . "*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])"
97   . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
98   . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)"
99   . "\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])"
100   . "|((([0-9A-F]{1,4}(((:[0-9A-F]{1,4}){5}::[0-9A-F]{1,4})|((:[0-9A-F]{1,4}){4}"
101   . "::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,1})|((:[0-9A-F]{1,4}){3}::[0-9A-F]{1,4}"
102   . "(:[0-9A-F]{1,4}){0,2})|((:[0-9A-F]{1,4}){2}::[0-9A-F]{1,4}(:[0-9A-F]{1,4})"
103   . "{0,3})|(:[0-9A-F]{1,4}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,4})|(::[0-9A-F]{1,4}"
104   . "(:[0-9A-F]{1,4}){0,5})|(:[0-9A-F]{1,4}){7}))|(::[0-9A-F]{1,4}(:[0-9A-F]{1,4}"
105   . "){0,6}))|::)|((([0-9A-F]{1,4}(((:[0-9A-F]{1,4}){3}::([0-9A-F]{1,4}){1})"
106   . "|((:[0-9A-F]{1,4}){2}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,1})|((:[0-9A-F]{1,4})"
107   . "{1}::[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,2})|(::[0-9A-F]{1,4}(:[0-9A-F]{1,4}"
108   . "){0,3})|((:[0-9A-F]{1,4}){0,5})))|([:]{2}[0-9A-F]{1,4}(:[0-9A-F]{1,4}){0,4}))"
109   . ":|::)((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{0,2})\.){3}(25[0-5]|2[0-4][0-9]|"
110   . "[0-1]?[0-9]{0,2})"
111   . "|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org"
112   . "|mobi|biz|arpa|info|name|pro|aero|coop|museum"
113   . "|[a-zA-Z]{2}))(\:[0-9]+)*(\/.($|[a-zA-Z0-9\.\:\,\?\'\(\)\\\*\+&%\$;|#\=~_\-\s@]*))*\/*";
114
115  $validipv4pattern = ":\/\/(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\."
116   . "(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])"
117   . "\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\/*";
118
119  $forbid = "\.(cmd|bat|exe|scr|pif|vbs|js|pif|msi|cdr)";
120
121// ****** !All overridable configuration variables must go above this line! ******
122
123  if (! isset($antiabuse)) $antiabuse = true;
124  if (! isset($netchecks)) $netchecks = true;
125  if (! isset($mustexist)) $mustexist = true;
126
127  // Status: 0=Ok, 1=Warn, 2=Black, 3=Policy, 4=Complaints
128
129  if (file_exists("tighturl.urlpattern.inc.php")) include("tighturl.urlpattern.inc.php");
130  if (file_exists("tighturl.tltpattern.inc.php")) include("tighturl.tltpattern.inc.php");
131  if (file_exists("tighturl.redirpattern.inc.php")) include("tighturl.redirpattern.inc.php");
132  if (file_exists("tighturl.ptcpattern.inc.php")) include("tighturl.ptcpattern.inc.php");
133  if (file_exists("tighturl.blpattern.inc.php")) include("tighturl.blpattern.inc.php");
134  if (file_exists("tighturl.config.inc.php")) include("tighturl.config.inc.php");
135
136  // Figure out our copyright string
137  $thisyear = date("Y");
138  $copyright = $copystart;
139  if ($copystart != $thisyear) $copyright .= "-" . $thisyear;
140  $uribls = "";
141  for ($i=0; $i<count($uribl); $i++) {
142    if ($i > 0) $uribls .= ", ";
143    $uribls .= "<a href='http://" . $uriblurl[$i] . "'>" . $uribl[$i] . "</a>";
144  }
145
146 if (! $FOFMethod) $parm = "?i="; // We need the parameter tag
147
148 // Figure out correct self
149  if (strncmp($_SERVER['PHP_SELF'], $_SERVER['REQUEST_URI'], strlen($_SERVER['PHP_SELF'])) != 0) {
150    if (preg_match("|(.*)/.*$|",$_SERVER['PHP_SELF'],$matches)) $self = $matches[1];
151    if (! preg_match("|.*/$|", $self)) $self .= "/";
152  }
153  else {
154    $self = $_SERVER['PHP_SELF']; // We need the script name
155    if (! preg_match("|.*/$|", $self)) $self .= "/";
156  }
157
158  // Connect to MySQL, open database.
159  $conn = @mysql_connect($dbhost, $dbuser, $dbpass) or die_HTML($svcname, "Error: Cannot connect to database.");
160  $db = mysql_select_db($dbname, $conn) or die_HTML($svcname, "Error: Cannot select database. ". mysql_error());
161
162  // When in doubt, turn Bad Behavior on, set it to FALSE in the config to turn it off.
163  if (! isset($BB2)) $BB2 = true;
164
165  // If user has not turned off Bad Behavior in the config, use BB2 (highly recommended) TODO: warn user if $BB2 but not BB2
166  if ($BB2 && file_exists("bad-behavior/bad-behavior-tighturl.php")) require_once("bad-behavior/bad-behavior-tighturl.php");
167    else $BB2 = FALSE;
168
169  // Figure out what kind of request this is and service it.
170  
171  // This is klugey. Clean up later.
172  // also I think data should be sanitized immediately
173  if ((isset($_REQUEST['save']) && $_REQUEST['save'] == 'y')
174   && (isset($_REQUEST['url']) && ! empty($_REQUEST['url']) && trim($_REQUEST['url']) != ""
175   && (preg_match("/^.*url=(.*)$/", $_SERVER['QUERY_STRING'], $matches) != 0))) {
176    $url = trim($matches[1]);
177    if (preg_match("/^(.*)&tighturlaction.*$/", $matches[1], $matches)) $url = $matches[1];
178    save_URL(urldecode($url));
179  }
180  elseif (isset($_REQUEST['i']) && !empty($_REQUEST['i'])) {
181    lookup_ID($_REQUEST['i']);
182  }
183  elseif (isset($pbi) && !empty($pbi) && isset($_REQUEST[$pbi]) && !empty($_REQUEST[$pbi])) {
184    PolicyBan_ID($_REQUEST[$pbi]);
185  }
186  elseif ($FOFMethod && preg_match("/^\/+([a-zA-Z0-9]+)\/*(.*)\/*$/", $_SERVER['REQUEST_URI'], $matches)) {
187    lookup_ID($matches[1]);
188  }
189  elseif ($FOFMethod && $_SERVER['REQUEST_URI'] != "/") {
190    display_HTML("", "", "Error: Couldn't find a valid " . $svcname . " URI.");
191  }
192  else {
193    display_HTML("", "main");
194  }
195  exit;
196
197// *************************************************************************
198
199
200/**
201 * sanitize a string for SQL input
202 */
203function sanitize_sql_string($string) {
204  return(mysql_real_escape_string($string));
205}
206
207/**
208 * Counts the number of times a substring is contained in a given string.
209 */
210function countSubstrs($haystack, $needle) {
211  return (($p = strpos($haystack, $needle)) === false) ? 0 : (1 + countSubstrs(substr($haystack, $p+1), $needle));
212}
213
214/**
215 * Checks to see if a given URI is on a URI blacklist.
216 * Currently this means SURBL (http://www.surbl.org) and URIBL (http://www.uribl.com)
217 *
218 * Returns TRUE if the domain is listed on any configured URIBLs, returns FALSE if
219 * anything goes wrong or the anti-abuse system is turned off.
220 *
221 * A companion URI extractor must be written for the below issues
222 * Must be changed to do full resolution of redirections on URI, simulating a browser
223 * Must be changed to do IPv6 lookups
224 * Must be changed to check multiple URIs (maybe a wrapper instead)
225 * Must be changed to optionally check HTML entity encoded versions of URIs
226 * Must be changed to handle URIBL's inclusion of some third-level domains.
227 *
228 */
229function URI_on_URIBL($uri) {
230
231// This code does not yet properly implement a correct and efficient querying
232// of URI BL data.
233
234  global $uribl, $uribluri, $validschemes, $validurlpattern, $validipv4pattern,
235         $antiabuse, $netchecks, $tltlds;
236
237  // Everything gets a pass if antiabuse or network tests are off.
238  if ((! $antiabuse) || (! $netchecks)) return(false);
239        
240  $uribls = "";
241
242  if ($uri) {
243    // Test for IPv4 address, reverse the quads if found
244    if (preg_match("/^".$validschemes.$validipv4pattern."/", $uri, $matches)) {
245      $domain=$matches[5] . "." . $matches[4] . "." . $matches[3] . "." . $matches[2];
246    }
247    else {
248      // strip out second-level domain name, *unless* on exception list,
249      // in which case, strip out third level also and test that instead.
250      // FIX: when testing uribl.com lists, also test additional level. First hit wins.
251
252      preg_match("/^".$validschemes.$validurlpattern."$/", $uri, $matches);
253      $domain = $matches[4];
254      if (preg_match("/".$tltlds."$/", $domain, $matches)) {$levels = 2;} else {$levels = 1;}
255
256      // klugey stripping routine to reduce domain to base domain name
257      // expect regex wojuld be better
258
259      $ss = countSubstrs($domain, ".");
260      while ($ss > $levels) {
261        $chop = strpos($domain, ".");
262        $domain = substr($domain, $chop + 1);
263        $ss = countSubstrs($domain, ".");
264      }
265    }
266
267    // Query URI blacklists to see if domain/IP appears as target in known spam
268    // or something involved in a malware/phishing attack.
269    for ($i=0; $i<count($uribl); $i++) {
270      $fqdn = $domain . "." . $uribl[$i];
271      $recexists = gethostbyname($fqdn); // ghbn weirdly returns the name on failure
272      if (($recexists != $fqdn) && preg_match("<^127\.>", $recexists)) {
273        if ($i > 0) $uribls .= ", ";
274        $uribls .= $uribl[$i];
275      }
276    }
277    return ($uribls); // change to return an array of indexes into the URIBL array
278  }
279}
280
281/**
282 * Checks to see if a given URL is a Reserved URL.
283 *
284 * Returns TRUE if the ID is listed as a Reserved URL.
285 */
286function on_Reserve($decimal) {
287  global $ReservedURL;
288
289  $res=FALSE;
290
291  if ($decimal) {
292    $sexatrigesimal = base_convert($decimal, 10, 36);
293    for ($i=0; $i<count($ReservedURL); $i++) {
294      if ($sexatrigesimal == strtolower($ReservedURL[$i])) return TRUE;
295    }
296    return FALSE;
297  }
298}
299
300/**
301 * Save the given URL in the database if unique and return the ID or return an existing ID for given URL.
302 * The ID returned will be a sexatrigesimal (Base-36) number.
303 *
304 * Saves the URL in the database, converts the decimal ID value returned by the database to
305 * a sexatrigesimal value, and displays the generated TightURL.
306 */
307function save_URL($url) {
308  global $dbtable, $svcname, $FOFMethod, $validschemes, $validurlpattern, $pasttext, $self, $redir, $bl,
309         $antiabuse, $netchecks, $ptc, $forbid, $mustexist;
310
311  if ($_SERVER['HTTP_USER_AGENT'] == "Python-urllib/2.5") {
312    display_HTML("", "save", "", $url, "http://tighturl.com/haibot");
313    exit;
314  }
315
316  $selfref = "|^(http://" . $_SERVER['HTTP_HOST'] . $self . ")|i";
317  $aliasref = "|^(http://www." . $_SERVER['HTTP_HOST'] . $self . ")|i";
318  $remote = $_SERVER['REMOTE_ADDR'];
319
320  if (preg_match($selfref, $url) || preg_match($aliasref, $url)) {
321    display_HTML("", "", "Error: A " . $svcname . " URL cannot point to another URL within " . $_SERVER['HTTP_HOST'] . $self . " .", $url, "", $url);
322  }
323  elseif (! preg_match("/^".$validschemes.$validurlpattern."$/", $url)) {
324    display_HTML("", "", "Error: That URL (".htmlspecialchars(strip_tags($url)).") is not valid.", $url, "", $url);
325  }
326  elseif (($forbid != "") && (preg_match("!.*".$forbid."$!i", $url))) {
327    display_HTML("", "", "Error: Executable URIs are not accepted here due to phishing/malware abuse.", $url, "", $url);
328  }
329// This code was never meant to be in a release. oops
330// elseif (preg_match("!.*\.gif$!", $url)) { // replace this with image comparison
331// display_HTML("", "", "Error: URL rejected for service abuse.", $url, "", $url);
332// }
333//RR elseif (isset($mustexist) && $mustexist && isset($netchecks) && $netchecks && (! Resolve_URL($url, $chain))) {
334//RR display_HTML("", "", "Error: Submitted URL does not exist on the public Internet.", $url, "", $url);
335//RR }
336  elseif (isset($bl) && preg_match("/.*".$bl.".*/i", $url)) { // delete this crap
337// echo "url: $url bl: $bl\n";
338    display_HTML("", "", "Error: URL rejected for violating our terms of use.", $url, "", $url);
339  }
340  elseif (preg_match("<.*".$redir.".*>i", $url)) {
341    display_HTML("", "", "Error: ".$svcname." is not a URL obfuscation service, and does not accept redirection links.", $url, "", $url);
342  }
343  elseif (isset($ptc) && preg_match("/.*".$ptc.".*/i", $url)) {
344    display_HTML("", "", "Error: ".$svcname." does not accept PTC (Pay To Click) links due to spamming abuse.", $url, "", $url);
345  }
346  else {
347    $safeurl = sanitize_sql_string($url);
348    $result = mysql_query("SELECT MAX(id) FROM $dbtable") or display_HTML("", "", "Error: $svcname system error.", $url, "", $url);
349    $lastid = mysql_result($result, 0) + 1;
350    $guesssexatrigesimal = base_convert($lastid, 10, 36);
351    $guessurl = "http://" . $_SERVER['HTTP_HOST'] . $self;
352    if (! $FOFMethod) $guessurl .= "?i="; // We need the parameter tag
353    $guessurl .= $guesssexatrigesimal; // Append the Base-36 ID to the URL
354    if ( strlen($guessurl) >= strlen($url) ) {
355      display_HTML("", "", "Fail: That URL cannot be shortened by $svcname. Sorry!", $url, "", $url);
356    }
357    else {
358      if ($antiabuse && $netchecks) {$lists = URI_on_URIBL($url);} else {$lists = false;}
359      if (! $lists) {
360        $rows=0; $srows=0; $testurl=$safeurl;
361        if (preg_match("/\/$/", $testurl)) $testurl = rtrim($testurl,"/");
362        $req = "SELECT * FROM $dbtable WHERE url = '$testurl/';";
363        $res = mysql_query($req);
364        $srows = @mysql_num_rows($res) or $srows = 0;
365        if ($srows == 0) {
366          $req = "SELECT * FROM $dbtable WHERE url = '$testurl';";
367          $res = mysql_query($req);
368          $rows = @mysql_num_rows($res) or $rows = 0;
369        }
370        if ($rows == 0 && $srows == 0) {
371          do {
372            $req ="INSERT INTO $dbtable (id, url, adddate, addip) ";
373            $req .= "VALUES ('', '$safeurl', NOW(), '$remote');";
374            if (mysql_query($req)) {
375              $decimal = mysql_insert_id();
376            }
377            else {
378              die_HTML($svcname, "Error: Database failure.");
379            }
380            $reserved_id = on_Reserve($decimal);
381            if ($reserved_id) {
382              // Delete this record so it doesn't override the reserved ID. (?)
383              $req = "DELETE FROM $dbtable WHERE id = '$decimal';";
384              $res = mysql_query($req) or die_HTML($svcname, "Error: Database failure.");
385            }
386          } while ($reserved_id);
387        }
388        else {
389          // Return existing ID for this duplicate request
390          $decimal = mysql_result($res, 0, "id");
391        }
392        $sexatrigesimal = base_convert($decimal, 10, 36);
393        $address = "http://" . $_SERVER['HTTP_HOST'] . $self;
394        if (! $FOFMethod) $address .= "?i="; // We need the parameter tag
395        $address .= $sexatrigesimal; // Append the Base-36 ID to the URL
396        display_HTML("", "save", "", $url, $address);
397      }
398      else {
399        display_HTML("HTTP/1.0 403 Forbidden", "", "Error: Submitted URL (" . $url . ") is listed in " . $lists . ". You may not create a " . $svcname . " link for it.");
400      }
401    }
402  }
403}
404
405/**
406 *
407 */
408function PolicyBan_ID($sexatrigesimal) {
409  global $dbtable, $svcname;
410  
411  // First, convert unsafe user input sexatrigesimal to decimal, which will be safe.
412  $decimal = base_convert ($sexatrigesimal, 36, 10);
413
414  $req = "SELECT * FROM $dbtable WHERE id = '$decimal';";
415  $res = mysql_query($req) or die_HTML($svcname, "Error: Query failed");
416 
417  $rows = mysql_num_rows($res);
418  if (($rows != 0) && (mysql_result($res, 0, "url") != "")) {
419    $req ="update $dbtable set status='4' where id='$decimal';";
420    $res = mysql_query($req);
421    die_HTML($svcname, "ID: " . $sexatrigesimal . " banned for policy violation.", $code="HTTP/1.0 200 OK");
422  }
423}
424
425/**
426 * Looks up given ID in the database and redirects, displays template, or
427 * displays error page. Expects the ID to be a sexatrigesimal (Base-36) number,
428 * which is the format used by TightURLs.
429 *
430 * We convert the ID to decimal before looking it up in the database, as the
431 * ID field is a MySQL autoincrement decimal value.
432 */
433function lookup_ID($sexatrigesimal) {
434  global $dbtable, $svcname;
435  
436  // First, convert unsafe user input sexatrigesimal to decimal, which will be safe.
437  $decimal = base_convert ($sexatrigesimal, 36, 10);
438
439  $req = "SELECT * FROM $dbtable WHERE id = '$decimal';";
440  $res = mysql_query($req) or die_HTML($svcname, "Error: Query failed");
441 
442  $rows = mysql_num_rows($res);
443  if (($rows != 0) && (mysql_result($res, 0, "url") != "")) {
444    // Change this logic to display a templated page instead?
445    switch (mysql_result($res, 0, "status")) {
446    case 5:
447      display_HTML("HTTP/1.0 403 Forbidden", "complaints");
448      return;
449      break;
450    case 4:
451      display_HTML("HTTP/1.0 403 Forbidden", "policy");
452      return;
453      break;
454    case 3:
455      display_HTML("HTTP/1.0 403 Forbidden", "blacklist");
456      return;
457      break;
458    default:
459      $url = stripslashes(mysql_result($res, 0, "url"));
460      break;
461    }
462    $req ="update $dbtable set lasthit=NOW(), hits=hits+1 where id='$decimal';";
463    $res = mysql_query($req);
464    header("HTTP/1.0 301 Moved Permanently");
465    header("Location: $url");
466  }
467  elseif (! on_Reserve($decimal)) { // Not found, Not on reserve
468    display_HTML("HTTP/1.0 404 Not Found", "", "Error: That " . $svcname . " ID is not in our database.");
469  }
470  else { // It's a(n implied) Reserved URL
471    // Is this a template or an API?
472    $sexatrigesimal = strtolower($sexatrigesimal);
473    switch ($sexatrigesimal) {
474    case "rest":
475      api_REST();
476      break;
477    case "xmlrpc":
478      api_XMLRPC();
479      break;
480    case "soap":
481      api_SOAP();
482      break;
483    default:
484      display_HTML("", $sexatrigesimal);
485    }
486  }
487}
488
489function api_REST() {
490  die_HTML($svcname, "Error: REST API not implemented yet.", "HTTP/1.0 501 Not Implemented");
491}
492
493function api_XMLRPC() {
494  die_HTML($svcname, "Error: XML-RPC API not implemented yet.", "HTTP/1.0 501 Not Implemented");
495}
496
497function api_SOAP() {
498  die_HTML($svcname, "Error: SOAP API not implemented yet.", "HTTP/1.0 501 Not Implemented");
499}
500
501/**
502 * Display HTML page using template and template variables.
503 *
504 * Reads in the main system template file (tighturl.tmpl) into $html .
505 *
506 * $code
507 * HTTP 1.0 status code and message.
508 *
509 * $template
510 * Checks for the existence of a subtemplate named tighturl.$template.tmpl
511 * and replaces template variable $HTML in the main template tighturl.tmpl
512 * with the contents of tighturl.$template.tmpl if any.
513 *
514 * Then any remaining $HTML from the only or inner template is replaced by $content,
515 * along with $url, $tighturl, and $input. A variety of other replacements are
516 * made using various global variables.
517 *
518 * $content
519 * HTML content to be replace template variable $HTML
520 *
521 * $url
522 * URL submitted to TightURL
523 *
524 * $tighturl
525 * TightURL generated for $url
526 *
527 * $input
528 * When submitted URL does not validate it is passed back as $input
529 *
530 * Template variables are words in all capital letters that start with a
531 * $ symbol, such as $TEMPLATEVARIABLE. TightURL now supports at least
532 * 20 template variables. At runtime, these template variables are replaced
533 * by program variables.
534 * - $HTML : HTML passed into the function as $input by the program or an inner template
535 * - $PARM : Parameter tag when not using 404-Method
536 * - $URL : URL submitted to TightURL
537 * - $URLLEN : Length of the submitted URL
538 * - $TIGHTURL : TightURL generated for the submitted URL
539 * - $TIGHTURLLEN : Length of generated TightURL
540 * - $DIFF : Difference in length between submitted and TightURLs
541 * - $INPUT : Bad input URL being passed back to output form
542 * - $SVCNAME : Name of the TightURL service
543 * - $HEADCOLOR : Color of the H1 Header tag
544 * - $TABLECOLOR : Color of the table containing URL input field
545 * - $TAGLINE : Tagline of the TightURL service
546 * - $CPASTTEXT : Capitalized past-tense word for tightening URLs
547 * - $PASTTEXT : Non-Capitalized past-tense word for tightening URLs
548 * - $CVERBTEXT : Capitalized action word for tightening URLs
549 * - $VERBTEXT : Non-Capitalized action word for tightening URLs
550 * - $COPYRIGHT : Copyright duration string generated from $copystart global variable,
551 * will be current 4-digit year if $copystart not defined.
552 * - $COPYRIGHTHOLDER : Name of copyright holder
553 * - $URLBLS : HTML string of URIBLs TightURL is checking
554 * - $HOST : Hostname TightURL is running on
555 * - $SELF : Name TightURL is invoked as
556 */
557function display_HTML ($code, $template, $content="", $url="", $tighturl="", $input="") {
558  global $svcname, $verbtext, $pasttext, $tagline, $uribls, $parm,
559       $headcolor, $tablecolor, $copyright, $copyrightholder, $self, $BB2;
560  
561// $url = htmlspecialchars($url);
562  if ($code="") $code = "HTTP/1.0 200 OK";
563  if (preg_match("/\/$/", $template)) $template = rtrim($template,"/");
564  if (file_exists("tighturl.tmpl")) {
565    $html = file_get_contents("tighturl.tmpl");
566    if (($template != "") && file_exists("tighturl." . $template . ".tmpl")) {
567      $template = file_get_contents("tighturl." . $template . ".tmpl");
568      $html = preg_replace("/\\\$HTML/", $template, $html);
569    }
570    elseif ($template != "") {
571      die_HTML($svcname, "Error: Template file tighturl." . $template . ".tmpl cannot be found.");
572    }
573    if (substr($content, 0, 6) == "Error:") {
574      $content = preg_replace("/Error:/", "<big><font color='red'>Error:", $content)."</font></big>";
575    }
576    if ($content) $content .= "<br />\n";
577    // Always replace longer similar tokens before shorter ones. Things won't work the
578    // way you expect if you replace $URL first, and then replace $URLMORELETTERS.
579    $html = preg_replace("/\\\$HTML/", $content, $html);
580    $html = preg_replace("/\\\$PARM/", $parm, $html);
581    $html = preg_replace("/\\\$URLLEN/", strlen($url), $html);
582    $html = preg_replace("/\\\$URL/", htmlspecialchars(strip_tags($url), ENT_QUOTES), $html);
583    $html = preg_replace("/\\\$INPUT/", $input, $html);
584    $html = preg_replace("/\\\$TIGHTURLVER/", VERSION, $html);
585    $html = preg_replace("/\\\$TIGHTURLLEN/", strlen($tighturl), $html);
586    $html = preg_replace("/\\\$TIGHTURL/", $tighturl, $html);
587    $html = preg_replace("/\\\$DIFF/", strlen($url)-strlen($tighturl), $html);
588    $html = preg_replace("/\\\$SVCNAME/", $svcname, $html);
589    $html = preg_replace("/\\\$HEADCOLOR/", $headcolor, $html);
590    $html = preg_replace("/\\\$TABLECOLOR/", $tablecolor, $html);
591    $html = preg_replace("/\\\$TAGLINE/", $tagline, $html);
592    $html = preg_replace("/\\\$CPASTTEXT/", $pasttext, $html);
593    $html = preg_replace("/\\\$PASTTEXT/", strtolower($pasttext), $html);
594    $html = preg_replace("/\\\$VERBTEXT/", strtolower($verbtext), $html);
595    $html = preg_replace("/\\\$CVERBTEXT/", $verbtext, $html);
596    $html = preg_replace("/\\\$COPYRIGHTHOLDER/", $copyrightholder, $html);
597    $html = preg_replace("/\\\$COPYRIGHT/", $copyright, $html);
598    $html = preg_replace("/\\\$URIBLS/", $uribls, $html);
599    $html = preg_replace("/\\\$HOST/", $_SERVER['HTTP_HOST'], $html);
600    $html = preg_replace("/\\\$SELF/", $self, $html);
601    $html = preg_replace("/\\\$__/", "$", $html); // Template Variables shown as text instead of substituted
602    if (preg_match("|<title>(.*)</title>|is", $html, $matches))
603      $html = preg_replace("|<title>(.*)</title>|is", "<title>" . strip_tags($matches[1]) . "</title>", $html);
604    if ($BB2) {
605      $html = preg_replace("/\\\$BBSTATS/", bb2_insert_stats(), $html);
606      $bb2code = bb2_insert_head();
607      if (preg_match("|<head>(.*)</head>|is", $html, $matches))
608        $html = preg_replace("|<head>(.*)</head>|is", "<head>\n" . $bb2code . $matches[1] . "</head>", $html);
609    }
610    else {
611      $html = preg_replace("/\\\$BBSTATS/", "", $html);
612    }
613    header($code);
614    echo $html;
615  }
616  else {
617    die_HTML($svcname, "Error: <big><font color='red'>Error: TightURL Redirection service (" . $svcname . ") site template not found.</font></big>");
618  }
619}
620
621/**
622 * Die in an HTML-friendly way, without the benefit of a template.
623 * Use display_HTML to "die" using the TightURL site template.
624 */
625function die_HTML($svcname, $errmsg, $code="HTTP/1.0 500 Internal Server Error") {
626
627  header($code);
628  echo "<html>\n <head>\n <title>" . $svcname . "</title>\n </head>\n <body>\n";
629  echo " " . $errmsg . "<br />\n";
630  echo " </body>\n</html>";
631  die;
632}
633
634/**
635 * Verifies the existence and accessibility of a resource in a given URL.
636 *
637 * Returns FALSE if the resource does not exist or cannot be accessed using
638 * supplied authentication information, else returns the resolved and verified
639 * URL. Given URL is returned as resolved to itself if $netchecks are off.
640 *
641 * Will recurse through redirection chains up to 12 times by default. This
642 * value is preferably selectable by the user in a configuration screen
643 * somewhere, but probably should not be lower than 12, as attempts are made
644 * to detect HTML and JavaScript redirects in addition to HTTP redirects, and
645 * a dozen redirects to find the end is quite possible.
646 *
647 * Unfortunately Google's GFE server erroneously returns 404 errors when
648 * they should be returning something like a 405, making it impossible to
649 * use HTTP HEAD to verify the existence of resources front-ended by GFE.
650 * Additionally Amazon throws a 405 attempting to HEAD some of their resources
651 * so this function does not attempt to use HEAD at all.
652 *
653 * BUG: Presently only does HTTP
654 *
655 */
656//function Resolve_URL ($url, &$resolvedchain=array(""), $chainlen=12) {
657function Resolve_URL ($url, &$resolvedchain, $chainlen=12) {
658// $resolvedchain = array($url);
659
660  if (! isset($resolvedchain)) $resolvedchain=array("");
661
662  global $netchecks;
663
664  if (! $netchecks) {
665    // If network checks off, accept submitted URL as resolved.
666    return($url);
667  }
668
669  $parsed = parse_url($url);
670
671  $pre = isset($parsed['scheme']) ? $parsed['scheme'].':'.((strtolower($parsed['scheme']) == 'mailto') ? '' : '//') : '';
672  $pre .= isset($parsed['user']) ? $parsed['user'].(isset($parsed['pass']) ? ':'.$parsed['pass'] : '').'@' : '';
673  $pre .= isset($parsed['host']) ? $parsed['host'] : '';
674  $pre .= isset($parsed['port']) ? ':'.$parsed['port'] : '';
675  if(isset($parsed['path']))
676    $post = (substr($parsed['path'], 0, 1) == '/') ? $parsed['path'] : ('/'.$parsed['path']);
677  else
678    $post = "/";
679  $post .= isset($parsed['query']) ? '?'.$parsed['query'] : '';
680  $post .= isset($parsed['fragment']) ? '#'.$parsed['fragment'] : '';
681
682  $resolved = false;
683
684  // Change this to support all protocols TightURL supports, not just HTTP
685  if (! isset($parsed['port']) || $parsed['port'] == 0) $parsed['port'] = 80;
686//if($connection = @fsockopen ($parsed['host'], $parsed['port'], $errno, $errstr, 5)) {
687  $ip = gethostbyname($parsed['host']); // This is supposed to avoid unnecessary DNS lookups
688  if($connection = @fsockopen ($ip, $parsed['port'], $errno, $errstr, 5)) {
689    stream_set_timeout($connection, 5);
690    // HTTP send Connection: Close so we don't have to wait
691    // Google's GFE handling of HEAD is broken, and Amazon returns 405 on HEAD so had to use GET
692    fwrite($connection, "GET ".$post." HTTP/1.0\r\nHost: ".$parsed['host']."\r\nConnection: Close\r\n\r\n");
693    while (!feof($connection)) {
694      $line_read=fgets($connection);
695      if ($line_read == "") break; //blank line is header delimiter, if you see it you're done here
696                                   //Fix: change this and start parsing the body for HTML-based redirections.
697
698      if (preg_match("/HTTP\/\S* +(\S*) /", $line_read, $matches)) { // Look for certain HTTP status codes
699        switch ($matches[1]) {
700        case 200: // Ok, we have a final destination (as far as HTTP is concerned)
701        case 201: // Created, we have a final destination
702        case 202: // Accepted, we have a final destination
703        case 203: // Non-authoritative reply, we have a final destination
704        case 204: // No content, we have a final destination
705        case 205: // Reset content, we have a final destination
706        case 206: // Partial content, we have a final destination
707        case 207: // Multi-status, we have a final destination
708        case 304: // Not Modified (this is ok)
709        case 401: // Authorization required (this is ok)
710        case 402: // Payment required (this is ok)
711        case 403: // Forbidden (but also ok)
712        case 405: // Method not allowed (but also ok)
713        case 406: // Not acceptable (acceptable here unless someone tells us otherwise)
714        case 409: // Conflict (acceptable unless someone tells us otherwise)
715        case 421: // Too many connections (fail ok)
716        case 426: // Use TLS (fail ok)
717        case 500: // Internal server error (fail ok)
718        case 502: // Bad gateway (fail ok)
719        case 503: // Service unavailable (fail ok)
720        case 504: // Gateway timeout (fail ok)
721        case 505: // HTTP version not supported (fail ok)
722        case 509: // Bandwidth exceeded pseudo code (fail ok)
723          $resolved = $url;
724          $resolvedchain[] = $url;
725          break 2;
726        case 300:
727        case 301:
728        case 302:
729        case 307:
730          break;
731        case 404: // Not found
732        case 408: // Request timeout (this URL will never work again)
733        case 410: // Gone (and not coming back)
734          break 2;
735        default:
736          $resolved = $url;
737          $resolvedchain[] = $url;
738          break 2;
739        }
740      }
741
742      // If this is a redirect (300, 301, 302, 307), follow it if the chain isn't too long
743      if (preg_match("/Location: (.*)\r\n/", $line_read, $matches)) {
744        fclose($connection);
745        $connection = false;
746        $resolvedchain[] = $url;
747        if ($chainlen > 0 ) $resolved = Resolve_URL($matches[1], $resolvedchain, $chainlen - 1);
748        break;
749      }
750
751    }
752    // Parse body here?
753    if ($connection) fclose($connection);
754  }
755  return($resolved);
756}
757?>
758

Archive Download this file

Branches