(require 'random)
(require 'posix-time)
(require 'sort)
(yreq "Utilities/yenta-utils")

(define *interests:available* #t)
(define *interests:in-use* #f)
; This is the lock for keeping interest-finding and interest use off of each
; others' backs. These cover the database and the interest information.
; This does *not* cover *interests:undone*

(def-yenta-param *interests:filename-reject-regexp* "~$|^#.+#$" ; Reject Emacs backup and autosave files.
  "Filename-rejection regexp"
  "Regexp dictating which filenames (not entire pathnames!) we'll reject when scanning documents.  Any directory whose name matches will also be ignored."
  vars:->regexp
  (settable))

;;; Returns a table of the interests to be sent out; for each, the caddr should
;;; be offered; if the car is not #f, it is the index of the cluster cache to
;;; add to, if so, the cadr is the contact info for the source of the rumor.
;;; this contact info includes, at the beginning, the number you should use
;;; for the interest when you talk to the person.
;;; for-yid is the id of the yenta we're making the table for-- we don't want
;;; to send rumors we've gotten from this yenta, clusters this yenta's in,
;;; etc.
(define (interests:make-interest-table attestations for-yid) #f)

;;; Returns a reply of the form:
;;; (goodness index)
;;; based on the cluster-cache of the best match from the interests in the
;;; database, if that interest is above *interests:match-threshold*.
(define (interests:match-interest-with-database document) #f)

;;; Adds the rumor to the rumor cache, possibly kicking out other rumors. Info
;;; is the contact info for the person we got this from. interest-ID is the
;;; number the other end gave us for the interest.
(define (interests:add-rumor document interest-ID info) #f)

;;; Coordinates stuff to contact the person we heard a rumor from.
;;; heard-from-and-id is the contact info for the yenta we should talk to,
;;; with the interest ID to present consed to the beginning. interested is
;;; the contact info for the interested party.
(define (interests:tell-source heard-from-and-id interested) #f)

;;; Figures out what to do about a received referral-- follow it up ourself,
;;; or past it on to the person we heard the rumor from.
(define (interests:handle-referral iid contact-info) #f)

;;; Adds the specified yenta to the cluster with the specified index, subject
;;; to the rules on which get kept when there are many contacts.
;;; if similarity is provided, this yenta may be a candidate for introduction.
(define (interests:add-to-cluster-cache index yid similarity) #f)

;;; Returns a list of pairs of the yids in the specified interest and the
;;; similarities, in order of best similarity. Note that this is in the
;;; judgement of the other yentas, i.e., these are the people most likely
;;; to think we are interesting, not the people we are most likely to think
;;; are interesting.
(define (interests:introduction-list index) #f)

;;; Returns the Yenta IDs of the Yentas in the cluster cache for the specified
;;; interest.
(define (interests:get-cluster-yids index) #f)

;;; Returns #t if the interest has been added, or #f if it will be when
;;; interest-clustering is done.
(define (interests:add-interest doc) #f)

;;; Returns #t if acquisition was in progress and more filenames were added,
;;; #f if acquisition was just started.
(define interests:acquire-docs 
  (let ((files '()))
    (lambda (filename)
      (set! filename (expand-dir filename)) ; +++ New.  Hope this does what I think it'll do.  --- Foner.
      (set! files (append (filenames-recursive filename *interests:filename-reject-regexp*) files)) ; %%% THIS NEEDS TO YIELD IN FILENAMES-RECURSIVE!
      (cond ((scheduler:task-exists? "Document Finder")
	     #t)
	    (t
	     (scheduler:add-once-task! "Document Finder" 3 scheduler:always
		(lambda ()
		  (let ((num-found 0))
		    (scheduler:simple-do
		     ((null? files)
		      (interests:find-interests!)
		      (inc! *ctr:reclusters*)
		      (ui:add-news-item "interests.html" 
					"Interests:Single Update"
					(format nil "~A document~:P found in ~A~
                                                     ~:[; now finding interests~;, so nothing to look at~]."
						num-found filename
						(zero? num-found))))
		     (let ((before (compare:collection-length
				    *interests:undone*)))
		       (format-debug 20 "Acquiring ~S~&" (car files))
		       (inc! *ctr:docs-scanned*)
		       (compare:acquire-document *interests:undone*
						 (car files))
		       (format-debug 20 "Done acquiring ~S~&" (car files))
		       (set! num-found (+ num-found 
					  (- (compare:collection-length
					      *interests:undone*)
					     before))))
		     (set! files (cdr files))))))
	     #f)))))

(define (interests:reschedule-auto-search)
  (unless *interests:last-collect*
    (interests:run-auto-search))
  (let ((next-search (+ (* *interests:collect-freq* 86400)
			*interests:last-collect*)))
    (scheduler:remove-task! "interests:auto-search")
    (when (> *interests:collect-freq* 0)
      (scheduler:add-at-task! "interests:auto-search" 1 next-search
			      interests:run-auto-search))))

(define (interests:run-auto-search)
  (set! *interests:last-collect* (current-time))
  (inc! *ctr:rescans*)
  (for-each interests:acquire-docs
	    *interests:search-dirs*)
  (interests:reschedule-auto-search))

(define (interests:run-auto-search-task)
  (let ((taskname "Auto search task"))
    (unless (scheduler:task-exists? taskname) ; Don't start a second one if we're still running.
      (scheduler:add-timeout-task!
	taskname 1 1			; Wait a second before starting, just in case.  [%%% Cleaner would be to wait until no httpd conns present.]
	interests:run-auto-search))))

(define (interests:description index)
  (let ((name (vector-ref (list-ref *interests:information* index) 0)))
    (if (= (string-length name) 0)
	(compare:show-document (compare:centroid index) 5)
	name)))

;;;; Giving news to the user about clusters we've just joined, and other Yentas to be introduced to.

;;; If we have just found a a first other Yenta for one of our interests
;;; (e.g., we have joined a cluster, even though it might consist of only
;;; one other Yenta so far), tell the news system about it, so the user has
;;; some idea.  It makes Yenta look much more responsive.
(define (interests:interest-now-has-a-cluster index) ; %%% should put a counter here!
  (let ((user-chosen-name (vector-ref (list-ref *interests:information* index) 0))
	(top-words (compare:show-document (compare:centroid index) 5))
	(url (format nil "interests-contact.html?~A" index)))
    (let ((announcement
	   (format nil "Your Yenta has just found another cluster that shares an <a href=\"~A\">interest of yours</a>.~&~
                        The interest~:['s~*~; is named <i>~A</i>, and its~] top few words are <i>~A</i>."
		   url
		   (not (equal? user-chosen-name ""))
		   user-chosen-name
		   top-words)))
      (ui:add-news-item "interests.html" "Cluster newly joined" announcement))))

(def-yenta-param *interests:introduction-threshold* 0.7
  "Introduction matching threshold"
  "This determines how much a prospective match has to be similar to one of your own interests for me to decide that they should be considered for an introduction.  It's a value between 0 and 1, inclusive."
  vars:->[0-1]
  (matching settable))

(def-yenta-param *interests:introduction-throttle* 0
  "Introduction throttle"
  "If there are likely candidates for introductions, this determines how often they will be suggested to you.  This can keep you from getting a huge pile of suggested introductions if you happen to join a group where many people match you closely.  It is a value in hours, and it may also be a fractional value, or zero.  Yenta waits at least this many hours in between each possible suggestion, and always suggests the best match to you first out of the collection of possible matches it's got.  If you set this to zero, Yenta will suggest all possible introductions within a few minutes of when they have been noticed, no matter how many that may be.  Yenta will only notice changes to this number <i>after</i> is has expired, so if it's set to a week and then you set it to a minute, that first timeout of a week will have to expire first."
  ;; %%% The cheesy dodge about zero above is because I don't want to include all the special-case logic to
  ;; %%% only have a periodic function iff the number is -not- zero.  Similarly, the wait-until-it-expires
  ;; %%% business is 'cause I don't want to have to terminate the task and restart it when this gets changed.
  ;; %%% In both cases, the elegant way out from the UI perspective would be to have a hook that runs arbitrary
  ;; %%% code when the value gets changed; perhaps I can do that from the checker function below.  Later.
  vars:->nonneg-number
  (matching settable))

;;; +++
;;; The following two variables use "entries" which consist of (YID similarity interest-index timestamp) tuples.
;;; The timestamp is the result of a call to (current-time).

;;; A list of tuples indicating Yenta's we think would be good to be introduced to.
;;; The list is always kept sorted such that the greatest similarity is first.
(def-yenta-var *interests:pending-introductions* '())

;;; A list of tuples which have already been suggested for introductions.
;;; Once we have mentioned a Yenta for an introduction, it's added to this table, which will keep it from
;;; being re-added to *interests:pending-introductions*, and hence we'll never mention them again.
;;;
;;; %%% Note that this particular logic also means that we'll never suggest an introduction to a Yenta
;;; %%% for a second time, even if it is newly found to match us on some -other- interest, and that we
;;; %%% won't tell the user that that other Yenta matches on multiple interests, even if we know that
;;; %%% the first time.  At some point I should probably fix this, but I imagine it's a fairly marginal
;;; %%% case in real use.
;;;
;;; %%% Note that, at the moment, this (a) grows monotonically, and (b) will get increasingly slow to search.
;;; %%% We should prune this, and also turn it into a hashtable.  Doing the latter means that the pruning logic
;;; %%% will also depend on our storing a timestamp or something into the table, since otherwise we'd never know
;;; %%% which to prune since hashtable elements have unpredictable order.  Another idea might be to keep the table
;;; %%% to some maximum length, composed only of the n -best- values, based on similarities; in that case, we'd need
;;; %%% the similarities.  To enable future expansion without too much pain, we keep the similarities and timestamps
;;; %%% around.
(def-yenta-var *interests:suggested-introductions* '())
;;; ---

;;; This might add an entry to the list of Yentas we might like to be introduced to.
(define (interests:maybe-record-introduction-candidate index similarity yid)
  (when (>= similarity *interests:introduction-threshold*)
    (unless (find-if (lambda (tuple)
		       (string=? yid (car tuple)))
		     *interests:suggested-introductions*)
      (let ((tuple (list yid similarity index (current-time))))
	(push! tuple *interests:pending-introductions*) ; %%% should put a counter here!
	(push! tuple *interests:suggested-introductions*))
      (set! *interests:pending-introductions*
	    (sort! *interests:pending-introductions*
		   (lambda (tuple1 tuple2)
		     (< (cadr tuple1) (cadr tuple2))))))))

;;; This should be called periodically to attempt an introduction.  It returns #t
;;; if there was an introduction to do, and #f otherwise; this allows it to be called
;;; in a tight loop in the special case that *interests:introduction-throttle* is 0.
;;; %%% At the moment, we don't call this in such a loop, btw.
(define (interests:make-an-introduction)
  (cond ((null? *interests:pending-introductions*)
	 #f)				; Can't use "when", 'cause that doesn't return #f if clause false...
	(t
	 (let* ((tuple (car *interests:pending-introductions*))	; %%% should put a counter here!
		(yid (nth 0 tuple))	; Oh for destructuring-bind!
		(similarity (nth 1 tuple))
		(index (nth 2 tuple))
		(handle (identity:get-handle yid))
		(user-chosen-name (vector-ref (list-ref *interests:information* index) 0))
		(top-words (compare:show-document (compare:centroid index) 5))
		(interest-url (format nil "interests-contact.html?~A" index))
		(announcement
		 (format nil "Another Yenta has been noticed that is a very close match to one of <a href=\"~A\">your interests</a>.~&
                              Perhaps you should introduce yourself by ~Asending it a message</a>.~&~
                              The other Yenta's handle is <i>~A</i>.~&~
                              The interest~:['s~*~; is named <i>~A</i>, and its~] top few words are <i>~A</i>.~&~
                              The match was ~A, compared to your current threshold of ~A.~&~
                              If you'd like to change how often introductions are suggested, try adjusting
                              \"Introduction matching threshold\" or \"Introduction throttle\"~&~
                              on the <a href=\"params.html\">tuning</a> page.~&"
			 interest-url
			 (reduce string-append (identity:compose-link (ui:bytes->hex yid)))
			 handle
			 (not (equal? user-chosen-name ""))
			 user-chosen-name
			 top-words
			 (/ (round (* similarity 1000)) 1000.0)	; Round to 3 sigfigs.  More is pointless.
			 *interests:introduction-threshold*)))
	   (inc! *ctr:intros-i-requested*)
	   (ui:add-news-item "interests.html" "Maybe you should introduce yourself" announcement)
	   (set! *interests:pending-introductions* (cdr *interests:pending-introductions*))
	   #t))))

(define *interests:intro-checker-taskname* "Maybe make an introduction")

;;; Sets up the task that periodically tries to do an introduction.
(define (interests:schedule-intro-checker)
  ;; First, try doing an introduction.
  (interests:make-an-introduction)
  ;; Next, schedule ourselves to run again.  This logic is stolen from stats:log-to-server.
  ;; It's done this way, instead of using a periodic task, because otherwise changes
  ;; in the timeout (e.g., because the user changed it, or because we're trying to dynamically adjust
  ;; the logging interval based on advice from the server) won't be noticed without also knowing to
  ;; kill and restart the task.  While this would be easy enough for the latter case, the former would
  ;; require special-casing the UI.
  (let ((timeout (min 60 (abs (* *interests:introduction-throttle* 3600))))) ; hours -> seconds, but never less than 1 minute, and never negative!
    (scheduler:remove-task! *interests:intro-checker-taskname*)
    (scheduler:add-timeout-task!
      *interests:intro-checker-taskname* 3
      timeout
      interests:schedule-intro-checker)))

;;;; Main routines.

(let ()
;;; *iy:cluster-cache* is of the form:
;;; ((<yid> or (<yid> . <similarity, according to other end>) ...) ...)
;;; outer list is in order to match the database, inner list is in no
;;; particular order.

;;; *interests:information* is of the form:
;;; (#(<name> <number of contacts> <is-interesting?> <iid>) ...)
;;; list is in order to match the database.

(define (introduction-list index)
  (sort! (filter pair? (list-ref *iy:cluster-cache* index))
	 (lambda (a b)
	   (< (cdr a) (cdr b)))))

(define (get-cluster-yids index)
  (map (lambda (item) (if (pair? item) (car item) item))
       (list-ref *iy:cluster-cache* index)))

(define (add-to-cluster-cache index yid similarity)
  (let* ((new #t)
	 (lst (filter (lambda (item)
			(or (and (not (pair? item))
				 (not (equal? item yid)))
			    (and (pair? item)
				 (not (equal? (car item) yid)))
			    (begin
			      (set! new #f)
			      #f)))
		      (list-ref *iy:cluster-cache* index))))
    (format-debug 0 "~&ATCC:  index = ~S, yid = ~S, similarity = ~S~&new = ~S, old II = ~S, lst = ~S~&"
		  index yid similarity new (vector-ref (list-ref *interests:information* index) 1) lst)
    (when new
      (let ((size (vector-ref (list-ref *interests:information* index) 1)))
	(when (zero? size)
	  (interests:interest-now-has-a-cluster index))
	(vector-set! (list-ref *interests:information* index) 1
		     (+ 1 size))))
    (set! lst (cons (if (number? similarity)
			(cons yid similarity)
			yid)
		    lst))
    (set! *iy:cluster-cache* (set-list-ref *iy:cluster-cache* index lst))
    (interests:maybe-record-introduction-candidate index similarity yid)))

(define (do-add document)
  (let ((index (compare:insert-document document)))
    (set! *interests:information* (set-list-ref *interests:information*
						index 
						(vector "" 0 #t
							(ssl:sha1-fingerprint
							 (compare:show-document
							  document
							  5)))))
    (set! *iy:cluster-cache* (set-list-ref *iy:cluster-cache*
					   index (list *local-yenta-id*)))))

(define (add-interest doc)
  (cond (*interests:clustering?*
	 (scheduler:split
	     (interests:stop-clustering)
	   (scheduler:when (interests:done-clustering?)
	     (do-add doc)))
	 #f)
	(t
	 (do-add doc)
	 #t)))

(define (match-interest-with-database document)
  (let ((match (compare:match document))
	(best 0)
	(bv 0.00))
    (do ((i 0 (+ i 1)))
	((>= i (compare:index-limit))
	 (format-debug 0 "~&bv = ~S; best = ~S~&" bv best)
	 (list bv best))
      (when (and (compare:index-valid? i)
		 (vector-ref (list-ref *interests:information* i) 2)
		 (> (array-ref match i) bv))
	(set! best i)
	(set! bv (array-ref match i))))))

; (define (match-table-with-database int)
;  (filter (lambda (item)
;	    (> (caddr item) *interests:match-threshold*))
;	  (map (lambda (item)
;		 (cons (car item) 
;		       (analyze (compare:import-document (cadr item)))))
;	       lst)))

(define (add-rumor doc iid info)
  (if (not ((hash-inquirer equal?) *interests:number-back* iid))
      (begin
	(compare:add-document doc *iy:rumor-cache*)
	((hash-associator equal?) *interests:number-back* iid info)
	(set! *iy:rumor-info* 
	      (append *iy:rumor-info* (list (cons iid info)))))))

(define (make-rumor-table for-yid)
  (do ((table '() (if (and (car (list-ref *iy:rumor-info* i))
			   (not (equal? for-yid
					(list-ref *iy:rumor-info* i))))
		      (cons (list
			     #f
			     (list-ref *iy:rumor-info* i)
			     (compare:collection-ref *iy:rumor-cache* i)
			     (car (list-ref *iy:rumor-info* i)))
			    table)
		      table))
       (i 0 (+ i 1)))
      ((= i (compare:collection-length *iy:rumor-cache*)) table)))

;;; %%% Fix this to remove clusters you're not interested in growing and clusters
;;; %%% you don't send based on attestations.
(define (make-own-interest-table attestations for-yid)
  (map (lambda (index)
	 (list index #f (compare:centroid index)
	       (vector-ref 
		(list-ref *interests:information* index) 3)))
       (filter (lambda (index)
		 (let ((info 
			(list-ref *interests:information* index)))
		   (and (vector-ref info 2)
			(not (member for-yid 
				     (get-cluster-yids index))))))
	       (compare:indices))))

(define (make-interest-table attestations for-yid)
  (cond ((and (identity:last-seen for-yid)
	      *interests:last-changed*
	      (> (identity:last-seen for-yid) *interests:last-changed*))
	 '())
	(t
	 (let* ((rumors (make-rumor-table for-yid))
		(own (make-own-interest-table attestations for-yid))
		(rumor-size (length rumors))
		(total-size (+ rumor-size (length own))))
	   (do ((table '() (cons (let ((next (random total-size)))
				   (if (>= next rumor-size)
				       (let ((topic 
					      (list-ref own 
							(- next rumor-size))))
					 (set! total-size (- total-size 1))
					 (set! own (delete topic own))
					 topic)
				       (let ((topic (list-ref rumors next)))
					 (set! total-size (- total-size 1))
					 (set! rumor-size (- rumor-size 1))
					 (set! rumors (delete topic rumors))
					 topic)))
				 table)))
	       ((= total-size 0) table))))))

(define (tell-source from-and-id interested)
  (identity:add-referral! (cadddr from-and-id) (list (car from-and-id)
						     interested))
  (iy:contact-yenta (cadddr from-and-id) 'referrals)
  )

(define (handle-referral iid contact-info)
  ;; This is where we determine whether we want to say it was our rumor
  ;; (which we can, if we want), or if we want to pass it back, or if we've
  ;; already forgotten about that rumor.
  (let ((source ((hash-inquirer equal?) *interests:number-back* iid)))
    (if (pair? source)
	(tell-source (cons iid source) contact-info)
	(if source
	    (iy:contact-host (car contact-info))
	    #f))))

(set! interests:introduction-list introduction-list)
(set! interests:get-cluster-yids get-cluster-yids)
(set! interests:add-to-cluster-cache add-to-cluster-cache)
(set! interests:add-interest add-interest)
(set! interests:match-interest-with-database match-interest-with-database)
(set! interests:make-interest-table make-interest-table)
(set! interests:add-rumor add-rumor)
(set! interests:tell-source tell-source)
(set! interests:handle-referral handle-referral)

)
