This is the new home of the egghelp.org community forum.
All data has been migrated (including user logins/passwords) to a new phpBB version.


For more information, see this announcement post. Click the X in the top right-corner of this box to dismiss this message.

Wiki Script Slow To Output To Channel

Support & discussion of released scripts, and announcements of new releases.
Post Reply
User avatar
Dominatez
Halfop
Posts: 50
Joined: Mon Jan 14, 2019 5:08 pm
Location: United Kingdom

Wiki Script Slow To Output To Channel

Post by Dominatez »

Hi Guys,

Wiki script is taking 2 - 3 minutes to output to the screen after i input anything, and i am really at a loss as to why it is.

Any help would be greatly appreciated.

Code: Select all


# Requires Tcl 8.5+ and tcllib
# To enable you must .chanset #channel +wiki


package require http
package require htmlparse
package require tls
::http::register https 443 ::tls::socket


namespace eval wiki {
	variable max_lines 1
	variable max_chars 400
	variable output_cmd "putserv"
	variable url "https://en.wikipedia.org/wiki/"

	bind pub -|- "!w" wiki::search
	bind pub -|- "!wiki" wiki::search

#	variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
	variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))}

	setudef flag wiki
}


proc wiki::fetch {term {url {}}} {
	if {$url != ""} {
		set token [http::geturl $url -timeout 10000]
	} else {
		set query [http::formatQuery [regsub -all -- {\s} $term "_"]]
		set token [http::geturl ${wiki::url}${query} -timeout 10000]
	}
	set data [http::data $token]
	set ncode [http::ncode $token]
	set meta [http::meta $token]
	upvar #0 $token state
	set fetched_url $state(url)
	http::cleanup $token

	# debug
	putlog "Fetch! term: $term url: $url fetched: $fetched_url"
	set fid [open "w-debug.txt" w]
	puts $fid $data
	close $fid

	# Follow redirects
	if {[regexp -- {^3\d{2}$} $ncode]} {
		return [wiki::fetch $term [dict get $meta Location]]
	}

	if {$ncode != 200} {
		error "HTTP query failed ($ncode): $data: $meta"
	}

	# If page returns list of results, choose the first one and fetch that
	#if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} {
	#	regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
	#	return [wiki::fetch $new_query]
	#}

	if {![regexp -- $wiki::parse_regexp $data -> out]} {
		error "Parse error"
	}

	return [list url $fetched_url result [wiki::sanitise $out]]
}

proc wiki::sanitise {raw} {
	set raw [::htmlparse::mapEscapes $raw]
	# Remove some help links
	set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]

	set raw [regsub -all -- {<(.*?)>} $raw ""]
	set raw [regsub -all -- {\[.*?\]} $raw ""]
	set raw [regsub -all -- {\n} $raw " "]
	return $raw
}

proc wiki::search {nick uhost hand chan argv} {
	if {![channel get $chan wiki]} { return }
	if {[string length $argv] == 0} {
		$wiki::output_cmd "PRIVMSG $chan :Please provide a term."
		return
	}

	set argv [string trim $argv]
	# Upper case first character
	set argv [string toupper [string index $argv 0]][string range $argv 1 end]

	if {[catch {wiki::fetch $argv} data]} {
		$wiki::output_cmd "PRIVMSG $chan :Error: $data"
		return
	}

	foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {
		if {[incr count] > $wiki::max_lines} {
			$wiki::output_cmd "PRIVMSG $chan :Output truncated. [dict get $data url]"
			break
		}
		$wiki::output_cmd "PRIVMSG $chan :$line"
	}
}

# by fedex
proc wiki::split_line {max str} {
	set last [expr {[string length $str] -1}]
	set start 0
	set end [expr {$max -1}]

	set lines []

	while {$start <= $last} {
		if {$last >= $end} {
			set end [string last { } $str $end]
		}

		lappend lines [string trim [string range $str $start $end]]
		set start $end
		set end [expr {$start + $max}]
	}

	return $lines
}

putlog "wiki.tcl loaded"
M
MMX
Voice
Posts: 4
Joined: Sun Mar 20, 2022 5:42 pm

Post by MMX »

There are some issues.

1. Wrong and unnecessary use of http::formatQuery - it is used to format request parameters and it takes an even number of arguments - a list of key value pairs (you're giving it just 1 argument). Because of this it gives an error (Incorrect number of arguments, must be an even number.) and cannot make a http request at all. It is not needed in this case.
Find this line:

Code: Select all

set query [http::formatQuery [regsub -all -- {\s} $term "_"]]
and make it look like this:

Code: Select all

set query [regsub -all -- {\s} $term "_"]
2. REGEXP is slow. And your pattern is not optimized.
With your current regexp pattern it took ~ 85 million steps to find what it looks for (and it fails in some cases). During the search, 1 CPU core is at 100% load for like 5 seconds (in my case, depends on the CPU). It may be better (and faster) to use [string first...] several times to find stuff from position to position, extract the info with [string range...] and then use simple regexp patterns to filter the content.

You can also try to change your regexp pattern to something simpler.
Just for example changing it to the one below, makes it respond in less than a second, but it may need further refining as it may not work in 100% of the pages on wikipedia.

Code: Select all

variable parse_regexp {<\/table>.*?<p>(.*?)<\/p>}
G
Goga
Halfop
Posts: 83
Joined: Sat Sep 19, 2020 2:12 am

Post by Goga »

Hello Masters.
I have put the given wiki script and got that error in the result.
Error: error flushing "sock7": software caused connection abort
User avatar
Arnold_X-P
Master
Posts: 226
Joined: Mon Oct 30, 2006 12:19 am
Location: DALnet - Trinidad - Beni - Bolivia
Contact:

Post by Arnold_X-P »

try this

Code: Select all

# Requires Tcl 8.5+ and tcllib
# To enable you must .chanset #channel +wiki

package require http
package require htmlparse
package require tls
 http::register https 443 [list tls::socket -tls1 1]

namespace eval wiki {
   variable max_lines 1
   variable max_chars 400
   variable url "https://en.wikipedia.org/wiki/"

   bind pub -|- "!w" wiki::search
   bind pub -|- "!wiki" wiki::search
    bind pub -|- ".w" wiki::search
   bind pub -|- ".wiki" wiki::search

  #variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
   #variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))}
  variable parse_regexp {<\/table>.*?<p>(.*?)<\/p>}
   setudef flag wiki
}

proc wiki::fetch {term {url {}}} {
   if {$url != ""} {
      set token [http::geturl $url -timeout 10000]
   } else {
     set query [regsub -all -- {\s} $term "_"]
   set token [http::geturl ${wiki::url}${query} -timeout 10000]
   }
   set data [http::data $token]
   set ncode [http::ncode $token]
   set meta [http::meta $token]
   upvar #0 $token state
   set fetched_url $state(url)
    http::cleanup $token

   # debug
   putlog "Fetch! term: $term url: $url fetched: $fetched_url"
   set fid [open "w-debug.txt" w]
    puts $fid $data
   close $fid

   # Follow redirects
   if {[regexp -- {^3\d{2}$} $ncode]} {
      return [wiki::fetch $term [dict get $meta Location]]
   }
   if {$ncode != 200} {
      error "HTTP query failed ($ncode): $data: $meta"
   }

   # If page returns list of results, choose the first one and fetch that
   #if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} {
   #   regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
   #   return [wiki::fetch $new_query]
   #}

   if {![regexp -- $wiki::parse_regexp $data -> out]} {
      error "Parse error"
   }
   return [list url $fetched_url result [wiki::sanitise $out]]
}

proc wiki::sanitise {raw} {
   set raw [::htmlparse::mapEscapes $raw]
   # Remove some help links
   set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]
   set raw [regsub -all -- {<(.*?)>} $raw ""]
   set raw [regsub -all -- {\[.*?\]} $raw ""]
   set raw [regsub -all -- {\n} $raw " "]
   return $raw
}

proc wiki::search {nick uhost hand chan argv} {
   if {![channel get $chan wiki]} { return }
   if {[string length $argv] == 0} {
	  puthelp "PRIVMSG $chan :Please provide a term."
      return
   }
   set argv [string trim $argv]
   # Upper case first character
   set argv [string toupper [string index $argv 0]][string range $argv 1 end]
   if {[catch {wiki::fetch $argv} data]} {
	  puthelp "PRIVMSG $chan :Error: $data"
      return
   }
   foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {
      if {[incr count] > $wiki::max_lines} {
		puthelp "PRIVMSG $chan :Output truncated. [dict get $data url]"
         break
      }
	  putserv [encoding convertfrom utf-8 "PRIVMSG $chan :$line"]
   }
}

# by fedex
proc wiki::split_line {max str} {
   set last [expr {[string length $str] -1}]
   set start 0
   set end [expr {$max -1}]
   set lines []
   while {$start <= $last} {
      if {$last >= $end} {
         set end [string last { } $str $end]
      }
      lappend lines [string trim [string range $str $start $end]]
      set start $end
      set end [expr {$start + $max}]
   }
   return $lines
}

putlog "wiki.tcl loaded"
.:an ideal world:. www.geocities.ws/chateo/yo.htm
my programming place /server ix.scay.net:7005
Post Reply