import java.lang.*;
import java.util.*;
import java.io.*;
import java.net.*;

public class TEST 
   {
    static public void main(String args[])
       {
        int argc = args.length;
        StringBuffer sb = new StringBuffer();
        long start = System.currentTimeMillis();
        boolean yahoo  = false;
        boolean google = false;
        boolean msoft  = false;
        boolean ss =     false;
        boolean p =      false;
        boolean d =      false;
        int qstart = 0;

        if((argc < 1) || (argc > 3))
           {
            printUsage();
           }
        else 
           {
            // decide whose search engine we are hitting
            if (args[0].compareTo("-y") == 0)
               {
                yahoo = true;
                qstart++;
               }
            else if (args[0].compareTo("-g") == 0)
               {
                google = true;
                qstart++;
               }
            else if (args[0].compareTo("-m") == 0)
               {
                msoft = true;
                qstart++;
               }
            else
              {
               printUsage();
                qstart++;
              }

            // decide sponsored search, performance only, or default to search 
            if (argc > 2)
               {
                if (args[1].compareTo("-ss") == 0)
                   {
                    ss = true;
                    qstart++;
                   }
                else if (args[1].compareTo("-p") == 0)
                   {
                    p = true;
                    qstart++;
                   }
                else if (args[1].compareTo("-d") == 0)
                   {
                    d = true;
                    qstart++;
                   }
               }
                
            for(int i = qstart; i < argc; i++)
               {
                sb.append(args[i]);
                sb.append(" ");
               }
            execute(sb.toString(), yahoo, google, msoft, ss, p, d, start);
           }
       }

    static void printUsage()
       {
        System.out.println("Usage: java TEST {-y/-g/-m} [-ss/-p/-d] query terms...");
        // -y --> Yahoo
        // -g --> Google
        // -m --> Microsoft
        // default is raw search (not Sponsored Search)
        // -ss --> Sponsored Search (Optional)
        // -p --> performance only (no parse, just get the raw HTML, optional)
        // -d --> debugging, print out UNADULTERATED HTML returned from search
        System.exit(-1);
       }

    static public void execute(String query, boolean yahoo,
                               boolean google, boolean msoft,
                               boolean ss, boolean p, boolean d,
                               long start)
       {
        URLConnection h;
        URL url;
        final String ysearch = "http://search.yahoo.com/search?p=";
        final String gsearch = "http://www.google.com/search?q=";
        final String msearch = "http://search.live.com/results.aspx?q=";
        String search;
        int count = 0;
        

        try 
           {
            if (yahoo)
               {
                search = ysearch;
                url = new URL("http://search.yahoo.com/search?p=" + 
                              URLEncoder.encode(query));
               }
            else if (google)
               {
                search = gsearch;
               }
            else if (msoft)
               {
                search = msearch;
               }
            else
               {
                search = ysearch;
               }
               
            url = new URL(search +  URLEncoder.encode(query));
            h = url.openConnection();
            h.setRequestProperty("User-Agent", 
                          "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT)");
            h.connect();            
            StringBuffer sb = new StringBuffer();
            try 
               {
                InputStream inp = h.getInputStream();
                byte[] buffer = new byte[1024*50];
                int length = 0;

                while( (length = inp.read(buffer)) != -1 )
                   {
                    sb.append(new String(buffer, 0, length));
                    count = count + length;
                   }
               }
            catch(Exception e)
               {
                System.out.println(e);
               }

            if (p)
               {
                // Performance only; don't parse the results
                System.err.println("Performance/MSECS: " + 
                                  (System.currentTimeMillis() - start) + 
                                   "  Bytes: " + count);
                System.exit(0);
               }
            else if (d)
               {
                // Debugging only; unadulterated HTML
                System.out.println("UNADULTERATED: " + sb.toString());
                System.exit(0);
               }
            else if (ss)
               {
                // going for the sponsored search results
                System.out.println("Sponsored Search results for " + query);
                if (yahoo)
                   {
                    extractURLSyahooss(sb.toString());
                   }
                else if (google)
                   {
                    extractURLSgoogless(sb.toString());
                   }
                else if (msoft)
                   {
                    extractURLSmsoftss(sb.toString());
                   }
                else
                   {
                    System.exit(-1);
                   }
               }
            else
               {
                // default is normal web search
                System.out.println("Search results for " + query);
                if (yahoo)
                   {
                    extractURLSyahoo(sb.toString());
                   }
                else if (google)
                   {
                    extractURLSgoogle(sb.toString());
                   }
                else if (msoft)
                   {
                    extractURLSmsoft(sb.toString());
                   }
                else
                   {
                    System.exit(-1);
                   }
               }
           }
        catch(Exception e) {}
       }


    static void extractURLSyahoo(String results) 
       {
	final String YTAG = "<span class=url>";
	final int YTAGLEN = 16;
        final char ENDCHAR = ' '; // default end character
	String ytag ="";
        StringBuffer ytagspecial;
	// use lowercase version of the page for string matching
	String lower = results.toLowerCase();
	// find the first ytag on the page
	int ytagStart = lower.indexOf(YTAG);
	int urlStart, urlEnd;
	char endYtagChar = ENDCHAR;
		
	while (ytagStart != -1) 
           {
	    // we need to find the beginning and end of the URL
	    urlStart =  ytagStart + YTAGLEN;				
	    urlEnd = lower.indexOf(endYtagChar, urlStart);
	    if (urlEnd != -1)
               {
                ytag = results.substring(urlStart, urlEnd);
		System.out.println(formatUrl(ytag));
                // now look for the next search result
		ytagStart = lower.indexOf(YTAG, urlEnd);
               }	
           } /* end while */
       } /* end extract URLS for Yahoo Search */


    static void extractURLSyahooss(String results) 
       {
	final String YTAG1 = "overture";
	final int YTAGLEN1 = 8;
        final String YTAG2 = "<em>";
        final int YTAGLEN2 = 4;
        final String YTAG3 = "</em>";
        final int YTAGLEN3 = 5;
        int start;
	String ytag = "";

	// use lowercase version of the page for string matching
	String lower = results.toLowerCase();
	int urlStart, urlEnd;
		
        // First key on overture string
        //  then search for delimiter for target URL

        start = lower.indexOf(YTAG1);
	while (start != -1) 
           {
	    // reset start past the overture string
	    start =  start + YTAGLEN1;				

            // search for the URL start delimiter
	    urlStart = lower.indexOf(YTAG2, start) + YTAGLEN2;

            // search for URL end delimiter and position end before
            urlEnd = lower.indexOf(YTAG3, start);

            // get URL
            ytag = results.substring(urlStart, urlEnd);

            // cleanup and print target URL
            System.out.println(formatUrl(ytag));

            // now look for the next search result
	    start = lower.indexOf(YTAG1, urlEnd);
           } /* end while */
       } /* end extract URLS for Yahoo Sponsored Search */


    static void extractURLSgoogle(String results) 
       {
	final String HREFTAG = "href=";
	final int HREFLEN = 5;
	String href ="";
	// use a lowercase version of the web page when doing string matching
	String lower = results.toLowerCase();
	// find the first href on the page
	int hrefStart = lower.indexOf(HREFTAG);
	int urlStart, urlEnd;
	int len = HREFLEN;
	boolean done;
	boolean haveQuote;
        boolean haveSearchResult;
	char endHrefChar = '>';	// default character marking end of an HREF
		
	while (hrefStart != -1) 
           {
	    done = false;
	    haveQuote = false;
	    // find the end of the URL
	    // first skip over blanks and the quote after the HREFTAG
	    int i = hrefStart + len;				
	    while (!done) 
               {
		endHrefChar = '>';  
		if ((lower.charAt(i) == '"') || 
                    (lower.charAt(i) == '\'') ||
                    (lower.charAt(i) == '`')) 
                   {
	            haveQuote = true;
		    endHrefChar = lower.charAt(i);
                    // ending href character is quote
		    done = true;
		    i++;	// skip over the quote
		   }
		else if (lower.charAt(i) != ' ') 
                   {
		    done = true;
		   }
		}
	    urlStart = i;
	    urlEnd = lower.indexOf(endHrefChar, urlStart + 1);
	    if (urlEnd != -1)
               {
	        // have the start and end of our URL
	        // determine whether it is a search result or a google link.
	        haveSearchResult = false;
	        if (lower.charAt(i) != '/')
                   {
	            // not a site relative link
		    href = results.substring(urlStart, urlEnd);
		    if (href.indexOf("google.com") != -1) 
                       {
		        // google link, skip it
                       } 
		    else if (href.indexOf("youtube.com") != -1) 
                       {
                        // youtube link, skip it 
                       }
                    else if ((href.charAt(7) >= '0') && (href.charAt(7) <= '9'))
                       {
                        // newly added, URL has IP address, skip it
                       }
                    else if (href.charAt(0) == 'a')
                       {
                        // a.href.replace, skip it 
                       }
	            else
                       {
		        // non-google link
		        haveSearchResult = true;
		       }
		   }
		else
                   {
		    // relative link, not a search result
		   }
				
		if (haveSearchResult) 
                   {
		    System.out.println(formatUrl(href));
                   }
                // now look for the next search result
		hrefStart = lower.indexOf(HREFTAG, urlEnd);
               }	
           } /* end while */
       } /* end extract URLS for Google Search */


    static void extractURLSgoogless(String results) 
       {
	final String HREFTAG1 = "id=an";
        final String HREFTAG2 = "<cite>";
        final String HREFTAG3 = "</cite>";
        final int HREFTAG1LEN = 5;
        final int HREFTAG2LEN = 6;
	String href = "";
	// use a lowercase version of the web page when doing string matching
	String lower = results.toLowerCase();

	int hrefStart, hrefEnd;
	int urlStart, urlEnd;

        // Now two searches per result, one to search for id=ad string
        //  the other to search for q= string
		
	hrefStart = lower.indexOf(HREFTAG1);
	while (hrefStart != -1) 
           {
            // search for actual result
	    hrefStart = lower.indexOf(HREFTAG2, hrefStart);
            if (hrefStart != -1)
               {
	        urlStart = hrefStart + HREFTAG2LEN;
	        urlEnd = lower.indexOf(HREFTAG3, urlStart);
                if (urlEnd == -1)
                   {
                    hrefStart = -1;
                   }
                else
                   {
                    href = results.substring(urlStart, urlEnd);
                    System.out.println(formatUrl(href));
                    // now look for the next search result
		    hrefStart = lower.indexOf(HREFTAG1, urlEnd);
                   }
               }	
           } /* end while */
       } /* end extract URLS for Google Sponsored Search */


    static void extractURLSmsoft(String results) 
       {
        final String START = "div id=\"results\"";
        final String END = "div class=\"sb_pag\"";
        final String SKIP1 = "msnscache";
        final String SKIP2 = "r.msn.com";
	final String HREFTAG = "href=\"";
        final int MSNLEN = 9;
	final int HREFLEN = 6;
	String href ="";

	// use a lowercase version of the web page when doing string matching
	String lower = results.toLowerCase();


	int hrefStart;
	int urlStart;
        int urlEnd;
        int searchStart;
        int searchEnd;
	int len = HREFLEN;
	boolean done;
	char endHrefChar = '"';	// default character marking end of an HREF
		

        // find the start of the Search results
        searchStart = lower.indexOf(START);

        // find the end of the Search results
        searchEnd = lower.indexOf(END);

	// find the first href on the page (after Start)
	hrefStart = lower.indexOf(HREFTAG, searchStart);
        
        if (hrefStart > searchEnd)
           {
            done = true;
           }
        else
           {
            done = false;
           }

        // plow thru all search results
	while (!done) 
           {
	    // find start, end of the URL string we want to return
	    urlStart = hrefStart + HREFLEN;
	    urlEnd = lower.indexOf(endHrefChar, urlStart);
	    // determine whether it is a search result or a msn link.
	    href = results.substring(urlStart, urlEnd);
	    if ((href.indexOf(SKIP1) == -1) && (href.indexOf(SKIP2) == -1))
               {
                // we have a result, print it
		System.out.println(formatUrl(href));
               } 
            // now look for the next search result
	    hrefStart = lower.indexOf(HREFTAG, urlEnd);
            if (hrefStart > searchEnd)
               {
                done = true;
               }
            else
               {
                done = false;
               }
           } /* end while */
       } /* end extract URLS for Microsoft Search */


    static void extractURLSmsoftss(String results) 
       {
        // different approach; note start,end of real results, 
        // segregate from sponsored ones
        final String START = "div id=\"results\"";
        final String END = "div class=\"sb_pag\"";
	final String HREFTAG = "<cite>";
        final String ENDREFSTRING = "</cite>";
        final String SKIP1 ="Encarta Encyclopedia";
        final String SKIP2 ="r.msn.com";
	final int HREFLEN = 6;
	String href ="";
	// use a lowercase version of the web page when doing string matching
	String lower = results.toLowerCase();


	int hrefStart;
	int urlStart;
        int urlEnd;
        int searchStart;
        int searchEnd;
	int len = HREFLEN;
	boolean done;
		
        // the Sponsored search results can be in multiple sections
        //  (top, right rail, bottom
        // different approach; note start,end of real results, 
        // segregate from sponsored ones
       
        // find start, end  of real results
        searchStart = lower.indexOf(START);
        searchEnd = lower.indexOf(END);
	hrefStart = lower.indexOf(HREFTAG);
        if ((searchStart == -1) || (searchEnd == -1) || (hrefStart == -1))
           {
            done = true;
           }
        else
           {
            done = false;
           }
        while (!done)
           {
            // return all results using hreftag that are not 
            // within start, end range of normal results

	    // find start, end of the URL string we want to return
	    urlStart = hrefStart + HREFLEN;
	    urlEnd = lower.indexOf(ENDREFSTRING, urlStart);

            if (((urlStart < searchStart) &&
                 (urlEnd   < searchStart)) ||
                ((urlStart > searchEnd) &&
                 (urlEnd   > searchEnd)))
               {
                // extract and print SS result (not a Web result
	        href = results.substring(urlStart, urlEnd);
                if ((href.indexOf(SKIP1) == -1) &&
                    (href.indexOf(SKIP2) == -1))
                   {
		    System.out.println(formatUrl(href));
                   }
               }
             
            // now look for the next sponsored search result
	    hrefStart = lower.indexOf(HREFTAG, urlEnd);
            if (hrefStart == -1)
               {
                done = true;
               }
           } // end outer while loop
       } /* end extract URLS for Microsoft Sponsored Search */


   static public String formatUrl(String ytag)
      {
       boolean first = true;
       boolean second = true;
       boolean done = false;
       char c;
       int length = ytag.length();
       char format[] = new char[length];
       int i = 0;
       int format_len = 0;
       final String prestring1 = "http://";
       final int prestringLen1 = 7;
       final String prestring2 = " - ";
       final int prestringLen2 = 3;
       int prestringStart;


// System.out.println("formatUrl/before: " + ytag);
       // first skip pre-strings if they exist (make into loop)
       prestringStart = ytag.indexOf(prestring1);
       if (prestringStart != -1)
          {
           i = i + prestringLen1;
          }
       prestringStart = ytag.indexOf(prestring2);
       if (prestringStart != -1)
          {
           i = i + prestringLen2;
          }
       
       // Next remove embedded HTML that bolds the result and end at trailing char 
       while ((i < length) && (!done))
          {
           c = ytag.charAt(i);
           if (c == '<')
              {
               if (first) 
                  {
                   /* skip first HTML tag <b> */
                   i = i + 3;
                   first = false;
                  }
               else if (second)
                  {
                   /* skip second HMTL tag </b> or <wbr/> */
                   i++;
                   c = ytag.charAt(i);
                   while (c != '>')
                      {
                       i++;
                       c = ytag.charAt(i);
                      }
                   i++;
                   second = false;
                  }
               else
                  {
                   done = true; // trailing slash
                  }
               }
           else if (c == '/')
              {
               done = true; // trailing slash
              }
           else
              {
               /* add character to result format string */
                format[format_len] = c;
                format_len++;
                i++;
              }
           } 
// System.out.println("formatUrl/after: " + new String(format) + format_len);

        return (new String(format));
       }
   } /* end class TEST */