Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ If you are using conda, you can install from the `conda-forge` channel:

wordcloud depends on `numpy`, `pillow`, and `matplotlib`.

For **correct Arabic / Hebrew / Syriac** (RTL) shaping in the Python API, use a
Pillow build with **libraqm** (`PIL.features.check("raqm")`). The examples under
`examples/` that set `text_direction` and `text_language` assume that stack.

If there are no wheels available for your version of python, installing the
package requires having a C compiler set up. Before installing a compiler, report
an issue describing the version of python and operating system being used.
Expand All @@ -43,10 +47,13 @@ Or run [examples/masked.py][masked] to see more options. A sample output is:
Getting fancy with some colors:
![Parrot with rainbow colors](examples/parrot_new.png)

Generating wordclouds for Arabic:
Generating wordclouds for Arabic in Arabic Unicode range (native RTL rendering via Pillow+libraqm; no `arabic_reshaper`). See [examples/arabic.py](examples/arabic.py) and [examples/arabicwords.txt](examples/arabicwords.txt).

![Arabic wordcloud](examples/arabic_example.png)

![Arabic wordlcloud](examples/arabic_example.png)
Kurdish Sorani (ckb) in Arabic Unicode range script: see [examples/ku_ckb_wordcloud.py](examples/ku_ckb_wordcloud.py) and [examples/ku_ckb_wordcloud.txt](examples/ku_ckb_wordcloud.txt). Sample output:

![Kurdish Sorani (ckb) word cloud](examples/ku_ckb_wordcloud.png)

## Command-line usage

Expand All @@ -60,7 +67,7 @@ If you're dealing with PDF files, then `pdftotext`, included by default with man

In the previous example, the `-` argument orders `pdftotext` to write the resulting text to stdout, which is then piped to the stdin of `wordcloud_cli.py`.

Use `wordcloud_cli --help` so see all available options.
Use `wordcloud_cli --help` to see all available options.

[blog-post]: http://peekaboo-vision.blogspot.de/2012/11/a-wordcloud-in-python.html
[website]: http://amueller.github.io/word_cloud/
Expand Down
60 changes: 39 additions & 21 deletions examples/arabic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,50 @@
===============
Generating a wordcloud from Arabic text

Dependencies:
- bidi.algorithm
- arabic_reshaper

Dependencies installation:
pip install python-bidi arabic_reshape
This example uses native WordCloud RTL support powered by Pillow+libraqm.
"""

import os
import codecs
from pathlib import Path

from wordcloud import WordCloud
import arabic_reshaper
from bidi.algorithm import get_display

# get data directory (using getcwd() is needed to support running example in generated IPython notebook)
d = os.path.dirname(__file__) if "__file__" in locals() else os.getcwd()
# Support running from the repo or from a copied notebook context
HERE = Path(__file__).resolve().parent

# Read the whole text.
f = codecs.open(os.path.join(d, 'arabicwords.txt'), 'r', 'utf-8')
ARABIC_TEXT = HERE / "arabicwords.txt"
NOTO_NASKH_ARABIC = HERE / "fonts" / "NotoNaskhArabic" / "NotoNaskhArabic-Regular.ttf"
# Fallback if Noto was removed (same bundle as ku_ckb_wordcloud.py)
ESTEDAD_FONT = HERE / "fonts" / "Estedad-v8.5" / "Estedad-Regular.ttf"

# Make text readable for a non-Arabic library like wordcloud
text = arabic_reshaper.reshape(f.read())
text = get_display(text)

# Generate a word cloud image
wordcloud = WordCloud(font_path='fonts/NotoNaskhArabic/NotoNaskhArabic-Regular.ttf').generate(text)
def pick_font():
candidates = (
NOTO_NASKH_ARABIC,
ESTEDAD_FONT,
)
for path in candidates:
if path.is_file():
return str(path)
return None


# Export to an image
wordcloud.to_file("arabic_example.png")
text = ARABIC_TEXT.read_text(encoding="utf-8")

font_path = pick_font()
if font_path is None:
raise SystemExit(
"No Arabic-capable font found under examples/fonts. "
f"Expected Noto at {NOTO_NASKH_ARABIC}"
)

# Generate a word cloud image
wordcloud = WordCloud(
font_path=font_path,
text_direction="auto",
text_language="ar",
).generate(text)

# Export next to this script (matches README image path)
out = HERE / "arabic_example.png"
wordcloud.to_file(str(out))
print(f"Wrote {out}")
Binary file modified examples/arabic_example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added examples/fonts/Estedad-v8.5/Estedad-Regular.ttf
Binary file not shown.
93 changes: 93 additions & 0 deletions examples/fonts/Estedad-v8.5/OFL.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
Copyright 2022 The Estedad Project Authors (https://github.com/aminabedi68/Estedad)

This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
https://openfontlicense.org


-----------------------------------------------------------
SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007
-----------------------------------------------------------

PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.

The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.

DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.

"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).

"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).

"Modified Version" refers to any derivative made by adding to, deleting,
or substituting -- in part or in whole -- any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.

"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.

PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:

1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.

2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.

3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.

4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.

5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.

TERMINATION
This license becomes null and void if any of the above conditions are
not met.

DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.
24 changes: 24 additions & 0 deletions examples/fonts/Estedad-v8.5/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Estedad
![sample](https://github.com/aminabedi68/Estedad/blob/master/documentation/Estedad-sample.png)
Estedad(`/~este~dAd/` means Talent in persian) is an Arabic-Latin Sans-Serif typeface in 9 standard weights and a variable version(`wght`:weight/`KSHD`:Arabic simple kashida). Estedad has wide codepoint range support for most Arabic and Latin languages.
<br>design of type is simple, smooth, compact, legible, low contrast, lowest optical size(a bit higher in bold and above weights) and optimized for web-like environments.
<br>Estadad supporting small caps, simple fractions, tabular and old-style numbers and both decomposed(limited to basic a-z A-Z letters) and precomposed letter forms for Latin, and semi-advanced quran marking methodes for Arabic script.
<br>this typeface designed inside fontforge(in two thin and black masters). variable font and static instances build with fontmake and help of python scripts(fontforge python api and fonttools). whole project is licenced to be free under open font licence v1.1.
<br>quality of variable font has improved under fontbakery advisments.

## Weights and Axes
Estedad has 9 Weights(Standard weights, 100-Thin to 900-Black) and 2 Axes(Weight[wght]:100-900 // Kashida[KSHD]:100-200)

## Build(Windows):
### requirements:
1-installed <a href="https://github.com/fontforge/fontforge">fontforge</a> with path access in environment variables
<br>2-installed <a href="https://www.python.org/">python</a> and <a href="https://github.com/googlefonts/fontmake">fontmake</a>
### build:
move script folder contents to sources folder and run Build.bat
<br>(FD version of variable font created manually.)
<br>
<br>
![designspace](https://github.com/aminabedi68/Estedad/blob/master/documentation/Estedad-designspace.png)

## weight axis nonlinearity:
The weight axis has a nonlinearity mapping to decrease distance of the bottom and increase distance of the above instances.
Binary file added examples/ku_ckb_wordcloud.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
41 changes: 41 additions & 0 deletions examples/ku_ckb_wordcloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python
"""
Kurdish word cloud - CKB (RTL) using Estedad from ./fonts/Estedad-v8.5.

No bidi pre-processing needed when Pillow has libraqm support.
"""

from pathlib import Path
from wordcloud import WordCloud

HERE = Path(__file__).resolve().parent

ESTEDAD_FONT = HERE / "fonts" / "Estedad-v8.5" / "Estedad-Regular.ttf"


def main() -> None:
ku_path = HERE / "ku_ckb_wordcloud.txt"
if not ku_path.is_file():
raise SystemExit(f"Missing {ku_path}")

text = ku_path.read_text(encoding="utf-8")

if not ESTEDAD_FONT.is_file():
raise SystemExit(f"Missing Estedad font at {ESTEDAD_FONT}")

wc = WordCloud(
font_path=str(ESTEDAD_FONT),
width=1200,
height=800,
background_color="white",
text_direction="auto",
text_language="ar",
).generate(text)

out = HERE / "ku_ckb_wordcloud.png"
wc.to_file(str(out))
print(f"Wrote {out}")


if __name__ == "__main__":
main()
84 changes: 84 additions & 0 deletions examples/ku_ckb_wordcloud.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
ئازادی
ژن
ژیان
کورد
کوردستان
نیشتمان
وڵات
پێشمەرگە
ڕۆژئاڤا
ڕۆژهەڵات
باکوور
باشور
مەهاباد
سلێمانی
ئامەد
قامیشلوو
کۆبانی
عەفرین
زاخۆ
خانەقین
گوڵاڵە
کرماشان
سنە
بۆکان
ورمێ
سۆرانی
کورمانجی
هەورامان
هەورامی
لوڕستان
ئیلام
لوڕی
کەلهوڕی
زازاکی
مێژوو
هەرێم
ناوەند
سەرچاوە
هێمن
هەژار
نالی
بابان
جزیری
هەرکی
بادینی
کەرکوک
هەولێر
پشدەر
ئالان
مەم
زین
خانی
تاژدین
ماد
وێنە
قەندیل
شەڕڤان
کەزی
شاخ
شاخەوان
هۆژان
پۆتان
فێرکاری
دۆلان
چاند
فەرهەنگ
کولتوور
زانست
مرۆڤ
هونەر
وانە
هەڵبەست
خاڵ
هەنبانە
دایک
منداڵ
باوک
پیرۆز
خاک
ئاڵا
هاوسەنگ
سەرۆک
خاتوون
کۆمەڵگا
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@ description = "A little word cloud generator"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.9"
license = "MIT"
dependencies = ["numpy>=1.19.3", "pillow", "matplotlib"]
dependencies = [
"cython",
"matplotlib>=1.5.3",
"numpy==1.19.3; python_version == '3.9'",
"numpy>1.19.3; python_version >= '3.10'",
"pillow",
]
dynamic = ["version"]

[project.urls]
Expand Down
Loading