Searx/ImmortalPoetry

Searx search plugin for immortalpoetry.com

File: searx/engines/immortalpoetry.py

"""
 @website     https://immortalpoetry.com/
 @provide-api yes (http://www.mediawiki.org/wiki/API:Search)

 @using-api   yes
 @results     JSON
 @stable      yes
 @parse       url, title, content

 @todo        change search URL depending on language
"""
from json import loads
from string import Formatter
from lxml.html import fromstring
from searx.url_utils import urlencode, quote
from searx.utils import html_to_text

# hmm

# engine dependent config
categories = ['general']
language_support = True
paging = True
number_of_results = 5
search_type = 'title'  # possible values: title, text, nearmatch
supported_languages = ['en', 'nb', 'no', 'sv']

base_url = 'https://linuxreviews.org/'
search_postfix = 'w/api.php?action=query'\
    '&list=search'\
    '&{query}'\
    '&format=json'\
    '&sroffset={offset}'\
    '&srlimit={limit}'
#    '&srwhat={searchtype}'

# get first meaningful paragraph
# try to avoid [[File:images]] and [[links]]
# TODO: Just filter [ and ] and remove if it's an image
def extract_first_paragraph(content):
    first_paragraph = None
    failed_attempts = 0
    for wparagraph in content.split(']'):
        for paragraph in wparagraph.split('['):
            length = len(paragraph)

            if length >= 30:
                first_paragraph = paragraph
                break

        failed_attempts += 1
        if failed_attempts > 5:
            return None
    return first_paragraph

# do search-request
def request(query, params):
    offset = (params['pageno'] - 1) * number_of_results

    string_args = dict(query=urlencode({'srsearch': query}),
                       offset=offset,
                       limit=number_of_results,
                       searchtype=search_type)

    if params['language'] == 'all':
        lang = 'en-US'
    else:
        lang = params['language']

    if lang == 'en':
        lang = 'en-US'

    if lang == 'en-US':
        site = 'immortalpoetry.com'
    elif lang == 'nb-NO':
        site = 'dikt.org'
    elif lang == 'sv-SE':
        site = 'svenskadikter.com'
    else:
        return None
    site_url = 'https://' + site + '/'

    #format_strings = list(Formatter().parse(base_url))
    search_url = site_url + search_postfix

    params['url'] = search_url.format(**string_args)
    params['site_url'] = site_url.format(**string_args)
    return params


# get response from search-request
def response(resp):
    results = []

    search_results = loads(resp.text)
    language = resp.search_params['language']
    site_url = resp.search_params['site_url']

    # return empty array if there are no results
    if not search_results.get('query', {}).get('search'):
        return []

    # parse results
    for result in search_results['query']['search']:
        if result.get('snippet', '').startswith('#REDIRECT'):
            continue
        url = site_url + quote(result['title'].replace(' ', '_').encode('utf-8'))

        extract = result['snippet']
        exttext = html_to_text(extract)
        summary = extract_first_paragraph(exttext)

        # append result
        results.append({'url': url,
                        'title': result['title'],
                        'content': summary})

    # return results
    return results

The plugin can use configuration such as:

File: searx/settings.yml

  - name : immortalpoetry
    engine : immortalpoetry
    shortcut : ip
    weight : 2
    number_of_results : 5

Searx/ImmortalPoetry

Navigation menu

Page actions

Page actions

Personal tools

Search

Navigation

fun free games

software benchmarks

educational videos

Comparisons

Great software

for beginners

cheat sheets

HOWTO

한국어

confused?

feed reader feeds

try your luck

logs

Tools