From b148a1925e10e78eb11234ad2eb71afb0cba44ad Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 1 Feb 2018 15:18:14 -0500 Subject: [PATCH 1/4] harden search --- core/search.py | 2 ++ frontend/views/__init__.py | 10 +++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/search.py b/core/search.py index d1d23d2b..a2fd24b7 100644 --- a/core/search.py +++ b/core/search.py @@ -60,6 +60,8 @@ def gluejar_search(q, user_ip='69.243.24.29', page=1): def googlebooks_search(q, user_ip, page): + if len(q) < 2 or len(q) > 2000: + return {} # XXX: need to pass IP address of user in from the frontend headers = {'X-Forwarded-For': user_ip} start = (page - 1) * 10 diff --git a/frontend/views/__init__.py b/frontend/views/__init__.py index 16fcda38..ed6873bf 100755 --- a/frontend/views/__init__.py +++ b/frontend/views/__init__.py @@ -528,7 +528,11 @@ BAD_ROBOTS = [u'memoryBot'] def is_bad_robot(request): user_agent = request.META.get('HTTP_USER_AGENT', '') for robot in BAD_ROBOTS: - if robot in user_agent: + try: + if robot in user_agent: + return True + except UnicodeDecodeError: + # user agent is sending illegal header return True return False @@ -1898,7 +1902,7 @@ class ManageAccount(FormView): return render(self.request, self.template_name, self.get_context_data()) def search(request): - q = request.GET.get('q', '') + q = request.GET.get('q', '').strip() ty = request.GET.get('ty', 'g') # ge= 'general, au= 'author' request.session['q'] = q try: @@ -1908,7 +1912,7 @@ def search(request): page = 1 gbo = request.GET.get('gbo', 'n') # gbo is flag for google books only our_stuff = Q(is_free=True) | Q(campaigns__isnull=False) - if q != '' and page == 1 and not gbo == 'y': + if len(q) > 1 and page == 1 and not gbo == 'y': isbnq = ISBN(q) if isbnq.valid: work_query = Q(identifiers__value=str(isbnq), identifiers__type="isbn") From 128b8c2f4853130eb85a40f2436fb9e62ca8b0f3 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 1 Feb 2018 15:19:06 -0500 Subject: [PATCH 2/4] url not guaranteed to be cleaned before provider --- frontend/forms/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/frontend/forms/__init__.py b/frontend/forms/__init__.py index 76479790..b563b279 100644 --- a/frontend/forms/__init__.py +++ b/frontend/forms/__init__.py @@ -154,12 +154,12 @@ class EbookForm(forms.ModelForm): new_label = self.data.get('new_version_label','') return new_label if new_label else self.cleaned_data['version_label'] - def clean_provider(self): + def set_provider(self): url = self.cleaned_data['url'] new_provider = Ebook.infer_provider(url) if url and not new_provider: raise forms.ValidationError(_("At this time, ebook URLs must point at Internet Archive, Wikisources, Wikibooks, Hathitrust, Project Gutenberg, raw files at Github, Google Books, or OApen.")) - return new_provider if new_provider else "Unglue.it" + self.cleaned_data['provider'] = new_provider if new_provider else "Unglue.it" def clean_url(self): url = self.cleaned_data['url'] @@ -170,6 +170,7 @@ class EbookForm(forms.ModelForm): raise forms.ValidationError(_("There's already an ebook with that url.")) def clean(self): + self.set_provider() format = self.cleaned_data.get('format', '') the_file = self.cleaned_data.get('file', None) url = self.cleaned_data.get('url', None) From 7e69ac95ebce4b783a2bb8830fb3db67a481fb41 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 1 Feb 2018 21:47:29 -0500 Subject: [PATCH 3/4] add negative id facets --- core/facets.py | 16 ++++++++++++---- frontend/templates/facets/id.html | 6 ++++++ static/images/-doab32.png | Bin 0 -> 4426 bytes static/images/-gtbg32.png | Bin 0 -> 2396 bytes 4 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 static/images/-doab32.png create mode 100644 static/images/-gtbg32.png diff --git a/core/facets.py b/core/facets.py index 52d32f9a..c282455c 100644 --- a/core/facets.py +++ b/core/facets.py @@ -130,8 +130,10 @@ class FormatFacetGroup(FacetGroup): return "These eBooks available in %s format." % self.facet_name return FormatFacet -idtitles = {'doab': 'indexed in DOAB', 'gtbg':'available in Project Gutenberg'} -idlabels = {'doab': 'DOAB', 'gtbg':'Project Gutenberg'} +idtitles = {'doab': 'indexed in DOAB', 'gtbg':'available in Project Gutenberg', + '-doab': 'not in DOAB', '-gtbg':'not from Project Gutenberg', } +idlabels = {'doab': 'DOAB', 'gtbg':'Project Gutenberg', + '-doab': 'not DOAB', '-gtbg':'not Project Gutenberg'} class IdFacetGroup(FacetGroup): def __init__(self): super(FacetGroup,self).__init__() @@ -144,10 +146,16 @@ class IdFacetGroup(FacetGroup): def set_name(self): self.facet_name=facet_name def id_filter(query_set): - return query_set.filter(identifiers__type=facet_name) + if facet_name[0] == '-': + return query_set.exclude(identifiers__type=facet_name[1:]) + else: + return query_set.filter(identifiers__type=facet_name) model_filters = {} def get_query_set(self): - return self._get_query_set().filter(identifiers__type=self.facet_name) + if facet_name[0] == '-': + return self._get_query_set().exclude(identifiers__type=self.facet_name[1:]) + else: + return self._get_query_set().filter(identifiers__type=self.facet_name) def template(self): return 'facets/id.html' @property diff --git a/frontend/templates/facets/id.html b/frontend/templates/facets/id.html index 65307609..35db7af6 100644 --- a/frontend/templates/facets/id.html +++ b/frontend/templates/facets/id.html @@ -7,5 +7,11 @@ {% if facet.facet_name == 'doab' %} These books are included in the Directory of Open Access Books. This means that they have been peer-reviewed and are of interest to scholars. {% endif %} + {% if facet.facet_name == '-gtbg' %} + These books are not included in Project Gutenberg. + {% endif %} + {% if facet.facet_name == '-doab' %} + These books do not seem to be included in the Directory of Open Access Books. + {% endif %}

\ No newline at end of file diff --git a/static/images/-doab32.png b/static/images/-doab32.png new file mode 100644 index 0000000000000000000000000000000000000000..00b7a0e08942f67b5c25158434e36eb1708d237d GIT binary patch literal 4426 zcmY*dXE+>Mw;nC}=p{mo(aV_8qet(8AbN|zkWojBFo=@qL@%S4sL=^Yh-g9dAUKRJ zQKCnQ^2s^pe)rz}JZtZ@)_&LfuC;&eCBN-`EQ002O#rKxIkb1L5$h~(DIUh49u z_2$6yG15>5ycuTwaTAbwYMT210OWLk3=fcnWxfHJBaF>ZW_r3Zk3EpW_KqG7aA7pk z^9Bt7$f0F!TqGQ24?-iKxckVU<-va=WN!RFX($-r>?0I7I*!$A_l;=(Yn z0vQMdlJjP@XbSsGpyou%DQ)hqp6SL|R%J3KNBjib8G>5FdYc zlsy{a?!);H$^Y=E!hIflBRo+E4|mWXUV8@*Uz9u;{72}&>z_VR2&ey5a`*XnteXL$ ze{P^6!Z7H6xo=A4{$ynycq8C9%76G3MCATP{=eM6I&#oI;{T_ae^FR%x2liZbnzvP#Y-*9!Mn8mo!f$jJta;*y_#`J_mpkrj= z71|6MEkv|r>18>M6_+#WW%m50kCACR&3$6J+gPSh&gaJFBxjhE)f1Jeof{c^U1J=* zE!rY>S4?kCW2@rA?6rn)7<$>@Op!U#bj) zD&w2fB?iu)HN;DYh%+UY5|99VhsC$B>hd2+-;=!s0_YR*s~6O8ZEE~D>?N$$Yz%F6j7uE zf@u+DNbCu;A+vk<4$^uiIo<8)`|VR|ryIa@mKWerjKQBS7pfeoH|;U|c84=9PqCNK zbv&B6^Jibol!d4tY3Zjzx~KZ&aWwZ(hv25sqoZR|37zA;O~USY;4FRkENq0kF|Nnz zEYCq83tRKF%deQ3f2g>!M{vsg=-WhEzShhP=M;L~uUvJo6e=LDKl?IDCJ{d|2w!}Y zPwx@nP*sAF*Y2Tqrf?SDOdo*e1@+JEK_cq2?@hn5PN`I%Nzr>u2&khdjr=O;qK|52m$P=BF(iTbI#q5o3jZA!_HMzcoL&fb>^ z#tU;1g*IG-_)TtKGDP;MG?!VPU$yi`CeSk&DEF&OyhAWhgoZ2MF=2qKEFKF(mpNdw zovZRcSCt4bxrB0)z+iF6(QcFDpp=kJo2f(=kw%o$>IdGVXe6YYLU=FUAgQ1DVLYd= zu#)!11eHBk#{JkRkU)ctHsf||bZg*i&aa|5hC;ADXXgB#a3@l%wPV?-rpx-Mcu#^& zbPrdVA1YcyvLigHU2lk6mIhQ}1vkXBB`8n9YNrclsLXbrL(O#QyTrTAS2SQToMlu( z!#n)wI-s;bWgubA4jl>` zwOvv{G8;b2^3QpZg<;azuO7#w;XL-O6UJ<1SuKNp@!oBFt&Gir`WDa}HSsj& z(QYG}XI7P&oL~bI@LN%c6%!p8I?4TY19P<7?J)e&r=h0>S%m8cfYZ62=Qz*z>KF zKY~};)2s-To1eURSzYJc0KYzVBZ;(|r7S=ADBT;_>pB-axh%snseab0QRTl_G<9(W zY}_Oyow4o-+$G1Lv%J23bod+M0AGXpCAC-{~OefByUvn)6TzXib zwl=Dt;-H`6B>UP5t|DNhd8w`pFs5Mfh9&9QbiU;6l<+@N>y!!NT&2V@bcZ zemQVoHh}yt`*l{&#GXBpropTroX0Ijw$ArTf=bR~Xd^g};*f$z3g?=4o>n5b%byvV zPw3`w7^~kR2b`FB!!>mIuKMgRy2fJ*?_H^?UrTu>-8?gIyAHyxBSUJI#oT#UIlAgz z?`(Fg0#GcJ(j+SP%?{Nnk!SF|EcgDM6U$}~y4FuIB`+FC;j5pu%G9qfsf3Q?`hVic zM{VJ6@(#u2=Nl&u*cC}}>HTzRbt_Lo8VL;O1txN?9s3V_dn~wo%i%!P6lzm|Qc9k@ zHp?Y| zKQ#i`;Y6>D4wJOyQwNw*a?o8>8*kMP6(|bbH0{eDSwM_hL^t-VoJBBgWPk=vUER|- zCnQK{DSYqyS=2PfKvnw=j?m6wLKSuv6&az=bS=B&#T35_oL(IbNYUR^W|8w})V+8p zF&P5|WwoU}rLD~T58QvHl@HWnGj*TPS^WUoewJ6+pH@Oo^f2JB!LM+=J ztrc$c7F`d6^muU#kRg!omN49`cD+7nO{0_>z<1?nmxIO(L$vdbO*g4{ZhV|;7%bam zT!2ZGfFcP^e(alk7G*`GhTjO!3Ro$gFUnWMaMrD`7s*?nVC|WTU z%A2S@nUyE|GE_QIO5|YbUOV?+e2Xas(@?h*EoJ*tJ?~Up-RzV#);0av{3$yRKikYP zvKsMKjjp^<3GJk!YN%~G+B*CRfjt};p%qYk^yG*T1TG;xHp-J}1BjI}FlE8dp;?%{v1AjS&5unj(?qNfNKg zYE!zQkA=wz!F{eR^OmvW>Ie*-0X9!Io^7dLO^cFh!5Y8weh)HzP3C!bLTj;}K7fW_ z#aQagpw?7pT=Gxx9M~(Eu489H&tV#)^|@AD@R3GP!k+tDO?5FEcEk7H>O&tEw7w#Gw}^b(ITwL}Rht#UeyYPM*v;@#j0 zM_;5+(>?PBG@eKRQMO@du65n4#ytVPXK1o&wf9mzXG-<8?aAR_e8m(Yb2pxnQBv^` zu*fXUvFT+<64!ITAaBK77T=f4aKC`hWMRaSi~%1k-|qp#xs~&|{YPGfGD!#X=?QFR z-C_qSUH0J-?j)5o<+))GPuUYtaV=~iun??4y?1hYn||yDv}HAo@(P2=5EAIS?v*@x zJcv8x*&ri~SSD&oG`gmS-D^mo_l3k!w;74?RgBbKoZ5Qp+tC=*dzO^Uns||!179)kde5~ShbQp9tV;Y3i=ibIKmll0 z*e`b@*B`uj^^P++3tWfs&QM?b;u@7XBL8(vRl2&1L^(M7XT9N+@8*pPcuK zvcWP&@!5R+C60h=+qo{qhN%3mY*!uY*#}<6+pm;1-EOo#Sb{{Xg2N2x?mP9I>qt6V3_E#yONR55s z+~{m=k#cPgix)Xqo#gF3owN`8tak4QfC`r3a#rE)8!G|rF8x_-Woux<3V-(`_Ic-- z=Qu}N=Ba7B>yYX}EUY_Nd@-%BU#O*!|DK6;hcg#S#$YVXXPrbN z-(4NaR+WT%1+uN8AsWFc#&# zW5Ttk(Smyo*eJyE2EjAl5gl+^|2`{-lW=_`Z^6~z&kJ5U=vteeUt$;wRq)2s1|Dm2 z#Z?Y%u)SVcEU4QoN+}@_!~e^3Ny5Fa+2V36*hL5H;%8!X*7-e4-8c&dq!6J6jgLi< z2;CW=Gq5!_&a6ncy6#^+SO`0%LyRdXx^Mj&>Z)fnu0KY=@!Rf?Vc@z&!oHWZEm*}~ z#kCCGLuR&DWLAWpmpZ>8lWbi9Z#rMo3)e3w05+=T%DA`)`CL2w|T0A=8jdJ?Jk4lAmvs;ZVpg2;Ex`dKz~eV(d$GgB*cr z!PKE>AOr$IQO}W(wiwet?fjh{)Pqi^AmMO-e}9<&VVEz~9j<{uAmHkna81oae8eHz zMKYbpJVd4`{U-Sj55|q=O7*1BJ$=cL4PK&)uOD3x3f&O;JAU7j?n(Nu5}EeL7T+Lz z;{>h&Q-}ZM=C`6Ysz__9ryE~+gCDJd`i1w`Zjv)0=^t9i?Ic)<1w@9f!_t;s+et)?_*=Cr`5a{J~s zuOgME173lnhORENCU?r(g~(na9)i}^7k{)%D(DPxQy3fA2!9FQG^7Wf1 z-fc}?e|amT7qMJ47vvq}{XK}GhmTbk86AIG>avv8Ft_>R#mq;5q_MIs|Ev#Sl-%>8-CUMv#Kn zca;?sTpXB=_F~3%^t5&6Y-w(I5RgX@9ZH@GcaiWrcnUJ~yfVU}Pd)O&C_^CjT*cXx zbDFOsvI2L4gv2%{sW7(8p*$9+lIK(-ruTstvMCk@{goG7iuYhB z+}&@MG-T z&9M{N*27hgRJME*b7zc|+Ghh`;px5V*;gJem{^r}e|g59oBwc%)KuBHYtcv`oDdTC zJ`T)j4NxP*E7)1H(2Lt;c6fV+Wtm}^HkuZRH^DXlQztQl5}bso;=Z&5{WYCSPJt4~ z>XLZf&l74m+P!r#pZcEU?L4%+pfvN!HB}yLyu$K+cS1O8LjE45%|;^ht3ZqekKG^r zq#M+kQQqe>bh!%Nup=Q$Cr~zTs!^s36TCVni@3Pc&$l5g6RT)w?5dPPXSjp{mvkA< z63h3JdIY^ZhQU}9z;;P>C3%aMZ$lMqjr7b1XE_#8$%|#akJ=6-9-P_Sfi=(EyZAs{ zKNhbUCAgdu7tO{twUko(-*#s2&5r~p2(ps2CO))&Xk&|syDo9CWhuvc*!G|Hkb+6i zRJu^@-D)#02~Sxzzh`EK}{abdOn04#?F{ zHGqDOfmT{fy+^H1*Zat+a05Zy?G&uu*f67na9w91tc>XlcN=x}o~E@MjazR$Q4cZD5El)G1Y@^UmFM+J{p5s%YCX&+~A?W%a|w;70D zlhx;ZyqA|%-Tv)#jhM1_!L>=LRCdVZ6gCmJfA`|VGet@S{p8X3Xq8w&N&?~nO4Y>* z)n&g!4Qt>mgV}D^;yg~R)6ANymuA-k&r7Ff8javJWn?)K15#1@B~`NWVMfQO=mqPwiE z_ARffp;{&TFz>o}Zs-k+{z8;o!{YJfgXD z4ARwNq88MO2sk=*O8% zjoy#cN0YDWXg6xNDw;P(O%Cemr5}A0vaS~=DjGBE7#aU!#s)w9kUf%-`DV4jFYU3S ztaApoqQi&b#dXgLLg=WMrsrgwQ*OC0Tyy?4BjqKmX(irji92m!AhhymO_eR|Wc*=g zU8wYp*@XqWT_~nHr?_VQoes806s(=Jw6tUq?=T$#uq0<*H+($%W8=$XZeoQgHgdlF EFN^G6zyJUM literal 0 HcmV?d00001 From bec9d46dc499e1a39589a01389f85991287af3a7 Mon Sep 17 00:00:00 2001 From: eric Date: Thu, 1 Feb 2018 21:48:00 -0500 Subject: [PATCH 4/4] fix campaign execution --- settings/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/settings/common.py b/settings/common.py index 82daf4ab..9dab230a 100644 --- a/settings/common.py +++ b/settings/common.py @@ -329,6 +329,7 @@ USER_AGENT = "unglue.it.bot v0.0.1 " GLUEJAR_COMMISSION = 0.06 PREAPPROVAL_PERIOD = 365 # days to ask for in a preapproval PREAPPROVAL_PERIOD_AFTER_CAMPAIGN = 90 # if we ask for preapproval time after a campaign deadline +PAYPAL_GLUEJAR_EMAIL = 'info@ebookfoundation.org' #legacy code needs this # How many days we will try to collect on failed transactions until they are written off RECHARGE_WINDOW = 14