61 lines
1.8 KiB
Hy
61 lines
1.8 KiB
Hy
#!/usr/local/bin/hy
|
|
|
|
(def *version* "0.0.2")
|
|
|
|
(import os re sys html2text
|
|
requests
|
|
[slugify [slugify]]
|
|
[datetime [datetime]]
|
|
[bs4 [BeautifulSoup]]
|
|
[xml.etree.ElementTree :as ET])
|
|
|
|
(def search-date (re.compile "saved by \S+ on (.*)"))
|
|
|
|
(defn extract-date [infoblock]
|
|
(let [datetext (.group (.search search-date infoblock) 1)
|
|
dateparse (.strptime datetime datetext "%B %d, %Y")]
|
|
(.strftime dateparse "%Y-%m-%d %a 00:00")))
|
|
|
|
(defn extract-tags [tags]
|
|
(map (fn [li] (. li text)) (.find-all tags "li")))
|
|
|
|
(defn get-details [article]
|
|
(let [anchor (. (.find article "h3") a)
|
|
title (. anchor text)
|
|
info (.find article "div" :class "articleInfoPan")
|
|
url (-> info (. p) (. a) (. text))
|
|
created-date (extract-date (. info text))
|
|
desc (.find article "div" :class "thumbTBriefTxt")
|
|
rawtags (list (extract-tags desc))
|
|
comment (.join " " (map (fn [i] (. i text)) (.find-all desc "p")))
|
|
tags (if (> (len rawtags) 0) (+ ":" (.join ":" rawtags) ":") "")]
|
|
(print (.format "** [[{}][{}] {}" url title tags))
|
|
(print ":PROPERTIES:")
|
|
(print (.format ":created: [{}]" created-date))
|
|
(print ":END")
|
|
(print "")
|
|
(if (> (len comment) 0)
|
|
(do (print comment)
|
|
(print "")))))
|
|
|
|
(defn process-page [html]
|
|
(let [soup (BeautifulSoup html "lxml")
|
|
articles (.find-all soup "div" :class "articleThumbBlockOuter")]
|
|
(for [article articles] (get-details article))
|
|
(let [nexturl (.find soup "a" {"aria-label" "Next"})]
|
|
(if nexturl
|
|
(+ "https://del.icio.us" (get nexturl "href"))
|
|
None))))
|
|
|
|
(defn process-request [url]
|
|
(let [req (requests.get url)
|
|
html (. req text)]
|
|
(process-page html)))
|
|
|
|
(defmain [&rest args]
|
|
(try
|
|
(let [nexturl (get args 1)]
|
|
(while nexturl
|
|
(def nexturl (process-request nexturl))))))
|
|
|